npm - @strav/brain - Versions diffs - 0.4.31 → 1.0.0-alpha.8 - Mend

@strav/brain 0.4.31 → 1.0.0-alpha.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

package/package.json +17 -20
package/src/brain_config.ts +72 -0
package/src/brain_error.ts +29 -0
package/src/brain_manager.ts +113 -132
package/src/brain_provider.ts +81 -6
package/src/index.ts +27 -43
package/src/provider.ts +48 -0
package/src/providers/anthropic_provider.ts +192 -246
package/src/thread.ts +99 -0
package/src/types.ts +101 -246
package/CHANGELOG.md +0 -44
package/README.md +0 -121
package/src/agent.ts +0 -93
package/src/helpers.ts +0 -1082
package/src/mcp_toolbox.ts +0 -62
package/src/memory/context_budget.ts +0 -120
package/src/memory/index.ts +0 -17
package/src/memory/memory_manager.ts +0 -168
package/src/memory/semantic_memory.ts +0 -89
package/src/memory/strategies/sliding_window.ts +0 -20
package/src/memory/strategies/summarize.ts +0 -157
package/src/memory/thread_store.ts +0 -56
package/src/memory/token_counter.ts +0 -101
package/src/memory/types.ts +0 -68
package/src/providers/google_provider.ts +0 -496
package/src/providers/openai_provider.ts +0 -569
package/src/providers/openai_responses_provider.ts +0 -321
package/src/tool.ts +0 -51
package/src/utils/error_scrub.ts +0 -5
package/src/utils/prompt.ts +0 -65
package/src/utils/retry.ts +0 -104
package/src/utils/schema.ts +0 -27
package/src/utils/sse_parser.ts +0 -62
package/src/workflow.ts +0 -199
package/tsconfig.json +0 -5

package/src/types.ts CHANGED Viewed

@@ -1,275 +1,130 @@
-// ── JSON Schema ──────────────────────────────────────────────────────────────
-/** Minimal recursive JSON Schema type. */
-export type JsonSchema = Record<string, unknown>
-// ── SSE ──────────────────────────────────────────────────────────────────────
-export interface SSEEvent {
-  event?: string
-  data: string
-}
-// ── Usage ────────────────────────────────────────────────────────────────────
-export interface Usage {
-  inputTokens: number
-  outputTokens: number
-  totalTokens: number
-}
+/**
+ * Public types for the brain runtime.
+ *
+ * Apps work with three high-level shapes:
+ *
+ *   - `Message` — a user/assistant turn in a conversation. `content` is
+ *     either a plain string or a list of `ContentBlock`s for richer
+ *     payloads (cached blocks, images in a later slice).
+ *
+ *   - `ChatOptions` — per-call knobs: model selection (explicit `model`
+ *     or `tier` sugar), `system` prompt with optional cache flag,
+ *     `maxTokens`, `thinking`, `effort`, etc.
+ *
+ *   - `ChatResult` — what comes back from `chat()`: assistant `text`,
+ *     `usage` (including cache hit/miss counters), `stopReason`, the
+ *     `model` that actually answered, and a `raw` escape hatch with the
+ *     provider's native response.
+ *
+ * The streaming side adds `StreamEvent` — a discriminated union of the
+ * events a provider emits while a response is being generated. V1
+ * covers text deltas, the final-message event, and `stopReason`;
+ * thinking blocks / tool-use streams land when those features ship.
+ */
-// ── Messages ─────────────────────────────────────────────────────────────────
+/** Coarse-grained model tier. Sugar for "fast / balanced / powerful" without naming an SDK. */
+export type ModelTier = 'fast' | 'balanced' | 'powerful'
-export interface ToolCall {
-  id: string
-  name: string
-  arguments: Record<string, unknown>
+/**
+ * A text content block. The `cache` flag lets apps mark long, stable
+ * prefixes for prompt caching; providers translate this to whatever
+ * cache mechanism their SDK exposes (Anthropic: `cache_control:
+ * {type: 'ephemeral'}`).
+ */
+export interface TextBlock {
+  type: 'text'
+  text: string
+  /** Mark this block as a cache breakpoint. Default `false`. */
+  cache?: boolean
 }
-export interface ContentBlock {
-  type: 'text' | 'tool_use' | 'tool_result'
-  text?: string
-  id?: string
-  name?: string
-  input?: Record<string, unknown>
-  toolUseId?: string
-  content?: string
-}
+export type ContentBlock = TextBlock
+/** A single conversation turn. `content` can be a bare string or a typed block list. */
 export interface Message {
-  role: 'user' | 'assistant' | 'tool'
+  role: 'user' | 'assistant'
   content: string | ContentBlock[]
-  toolCalls?: ToolCall[]
-  toolCallId?: string
-}
-// ── Tool Definition ──────────────────────────────────────────────────────────
-export interface ToolDefinition {
-  name: string
-  description: string
-  parameters: JsonSchema
-  execute: (args: Record<string, unknown>, context?: Record<string, unknown>) => unknown | Promise<unknown>
-}
-// ── Completion Request / Response ────────────────────────────────────────────
-export interface CompletionRequest {
-  model: string
-  messages: Message[]
-  system?: string
-  tools?: ToolDefinition[]
-  toolChoice?: 'auto' | 'required' | { name: string }
-  maxTokens?: number
-  temperature?: number
-  schema?: JsonSchema
-  stopSequences?: string[]
-}
-export interface CompletionResponse {
-  id: string
-  content: string
-  toolCalls: ToolCall[]
-  stopReason: 'end' | 'tool_use' | 'max_tokens' | 'stop_sequence'
-  usage: Usage
-  raw: unknown
-}
-// ── Streaming ────────────────────────────────────────────────────────────────
-export interface StreamChunk {
-  type: 'text' | 'tool_start' | 'tool_delta' | 'tool_end' | 'usage' | 'done'
-  text?: string
-  toolCall?: Partial<ToolCall>
-  toolIndex?: number
-  usage?: Usage
-}
-// ── Output Schema ────────────────────────────────────────────────────────────
-/** A schema that optionally validates data via `.parse()` (e.g., Zod schema). */
-export interface OutputSchema {
-  parse?: (data: unknown) => unknown
-  [key: string]: unknown
-}
-// ── Agent ────────────────────────────────────────────────────────────────────
-export interface ToolCallRecord {
-  name: string
-  arguments: Record<string, unknown>
-  result: unknown
-  duration: number
-}
-export interface AgentResult<T = any> {
-  data: T
-  text: string
-  toolCalls: ToolCallRecord[]
-  messages: Message[]
-  usage: Usage
-  iterations: number
 }
-export interface AgentEvent {
-  type: 'text' | 'tool_start' | 'tool_result' | 'iteration' | 'done' | 'suspended'
-  text?: string
-  toolCall?: ToolCallRecord
-  iteration?: number
-  result?: AgentResult
-  suspended?: SuspendedRun
-}
-// ── Suspend / Resume ─────────────────────────────────────────────────────────
 /**
- * A JSON-serializable snapshot of an agent loop at the moment it suspended.
- *
- * All fields are plain data — no functions, class instances, or cycles — so
- * the snapshot can be stringified, stored across a process boundary, and
- * later passed to `AgentRunner.resume()` to continue the run.
+ * The `system` prompt. Either a plain string (no cache) or a structured
+ * form that lets apps mark the prompt as cached. Apps that want
+ * fine-grained control over multi-block system prompts pass an array.
  */
-export interface SerializedAgentState {
-  messages: Message[]
-  allToolCalls: ToolCallRecord[]
-  totalUsage: Usage
-  iterations: number
-}
+export type SystemPrompt =
+  | string
+  | { text: string; cache?: boolean }
+  | Array<{ text: string; cache?: boolean }>
 /**
- * Result of an agent run that was suspended before executing one or more
- * tool calls. The integrator is expected to obtain tool results out-of-band
- * (human approval, external system, queued job, etc.) and call
- * `AgentRunner.resume(state, toolResults)` to continue.
- *
- * `pendingToolCalls` contains the pending call that triggered suspension
- * plus any subsequent tool calls from the same batch that have not been
- * executed. Results must be supplied for each of them on resume so the
- * conversation remains well-formed for the provider.
+ * Per-call options. Generics are deliberately conservative — apps
+ * don't usually need to type-narrow the provider response; the `raw`
+ * escape hatch in `ChatResult` is what they reach for when they need
+ * provider-specific fields.
  */
-export interface SuspendedRun {
-  status: 'suspended'
-  pendingToolCalls: ToolCall[]
-  state: SerializedAgentState
-}
-/** Result of a pending tool call, supplied to `AgentRunner.resume()`. */
-export interface ToolCallResult {
-  toolCallId: string
-  result: unknown
-}
-// ── Workflow ──────────────────────────────────────────────────────────────────
-export interface WorkflowResult {
-  results: Record<string, AgentResult>
-  usage: Usage
-  duration: number
-}
-// ── Embedding ────────────────────────────────────────────────────────────────
-export interface EmbeddingResponse {
-  embeddings: number[][]
-  model: string
-  usage: { totalTokens: number }
-}
-// ── Transcription (Speech-to-Text) ───────────────────────────────────────────
-export interface TranscribeRequest {
-  /** Audio bytes. Most STT endpoints cap at ~25MB; chunk longer recordings. */
-  audio: Uint8Array | Blob
-  /**
-   * MIME type of the audio. Required for providers that infer format from
-   * the multipart filename or rely on it for inline base64 (Gemini).
-   * Examples: 'audio/m4a', 'audio/mpeg', 'audio/wav', 'audio/ogg',
-   * 'audio/webm', 'audio/flac'.
-   */
-  contentType?: string
-  /** Override the provider's default STT model. */
+export interface ChatOptions {
+  /** Override the configured default model. Wins over `tier`. */
   model?: string
+  /** Sugar for selecting a model by tier. Resolved against `config.brain.tiers`. */
+  tier?: ModelTier
+  /** System prompt — typed shape supports prompt caching. */
+  system?: SystemPrompt
+  /** Hard ceiling on response tokens. Default `4096`. */
+  maxTokens?: number
   /**
-   * BCP-47 language hint (e.g. 'th', 'en', 'zh'). Whisper accepts ISO-639-1
-   * ('th'); Gemini uses BCP-47. Both improve accuracy when set; omit for
-   * auto-detection.
+   * Adaptive thinking control. `'adaptive'` enables it; `'disabled'`
+   * (or omission) turns it off. On Opus 4.7 + 4.6 / Sonnet 4.6 this
+   * is the only supported thinking mode — `budget_tokens` is removed
+   * upstream and not exposed here.
    */
-  language?: string
+  thinking?: 'adaptive' | 'disabled'
+  /** Effort hint. `low` / `medium` / `high` / `xhigh` / `max`. Defaults to provider's pick. */
+  effort?: 'low' | 'medium' | 'high' | 'xhigh' | 'max'
   /**
-   * Optional priming prompt — gives the model vocabulary or context to
-   * bias toward (proper nouns, brand names, menu items, dialect markers).
-   * Whisper uses this directly; Gemini incorporates it into the system
-   * instruction.
+   * Top-level cache_control toggle. When `true` the provider asks the
+   * SDK to auto-cache the last cacheable block on every request.
+   * Defaults to `config.brain.cache.auto ?? false`.
    */
-  prompt?: string
+  cache?: boolean
   /**
-   * Filename to send in the multipart form (Whisper). Used to derive the
-   * audio format on the server when `contentType` is missing. Defaults to
-   * 'audio.bin' if not provided.
+   * Beta features to enable on this request. Pass through to the
+   * provider's beta-header machinery. Provider-specific.
    */
-  filename?: string
-}
-export interface TranscriptionResponse {
-  /** Transcribed text. */
-  text: string
-  /** Detected language, when the provider reports one. */
-  language?: string
-  /** Audio duration in seconds, when the provider reports one. */
-  duration?: number
-  /** Original provider response for callers that need provider-specific fields. */
-  raw: unknown
-}
-// ── Provider ─────────────────────────────────────────────────────────────────
-export interface AIProvider {
-  readonly name: string
-  complete(request: CompletionRequest): Promise<CompletionResponse>
-  stream(request: CompletionRequest): AsyncIterable<StreamChunk>
-  embed?(input: string | string[], model?: string): Promise<EmbeddingResponse>
+  betas?: readonly string[]
   /**
-   * Transcribe audio to text. Implemented by providers that expose a
-   * speech-to-text endpoint (OpenAI Whisper, Google Gemini's multimodal
-   * generateContent). Throws or remains undefined for providers without
-   * STT (Anthropic at time of writing).
+   * Provider-specific overrides. `BrainManager.chat` selects the
+   * provider by config; this is the override for that.
    */
-  transcribe?(request: TranscribeRequest): Promise<TranscriptionResponse>
+  provider?: string
 }
-// ── Hooks ────────────────────────────────────────────────────────────────────
-export type BeforeHook = (request: CompletionRequest) => void | Promise<void>
-export type AfterHook = (
-  request: CompletionRequest,
-  response: CompletionResponse
-) => void | Promise<void>
-// ── Config ───────────────────────────────────────────────────────────────────
-export interface ProviderConfig {
-  driver: string
-  apiKey: string
-  model: string
-  baseUrl?: string
-  maxTokens?: number
-  temperature?: number
-  maxRetries?: number
-  retryBaseDelay?: number
+/** Token usage for a single call. Cache-hit fields are populated when caching is in play. */
+export interface ChatUsage {
+  inputTokens: number
+  outputTokens: number
+  cacheReadTokens: number
+  cacheCreationTokens: number
 }
-export interface BrainConfig {
-  default: string
-  providers: Record<string, ProviderConfig>
-  maxTokens: number
-  temperature: number
-  maxIterations: number
-  memory?: import('./memory/types.ts').MemoryConfig
+/**
+ * The provider's reply. `text` is the concatenated assistant text;
+ * `raw` is the provider's full native response shape for apps that
+ * need anything we don't surface (e.g. citation blocks, server-tool
+ * results once those ship).
+ */
+export interface ChatResult<Raw = unknown> {
+  text: string
+  model: string
+  stopReason: string | null
+  usage: ChatUsage
+  raw: Raw
 }
-// ── Serialized Thread ────────────────────────────────────────────────────────
-export interface SerializedThread {
-  messages: Message[]
-  system?: string
-}
+/**
+ * Streaming event union. V1 covers the text-delta + completion path
+ * apps want for chat-style UIs; thinking blocks and tool-use streams
+ * are reserved for later slices.
+ */
+export type StreamEvent =
+  | { type: 'text'; delta: string }
+  | { type: 'stop'; stopReason: string | null; usage: ChatUsage }

package/CHANGELOG.md DELETED Viewed

@@ -1,44 +0,0 @@
-# Changelog
-## 0.2.12
-### Added
-- **GoogleProvider** — Support for Google's Gemini models
-  - Native Gemini API integration using `generativelanguage.googleapis.com`
-  - Support for completion, streaming, function calling, and embeddings
-  - Models: `gemini-2.0-flash`, `gemini-2.5-flash`, `gemini-3-pro-preview`
-  - Authentication via `x-goog-api-key` header
-  - Zero new dependencies — uses raw `fetch()` following existing patterns
-  - Comprehensive test suite with 29 tests covering all functionality
-## 0.6.0
-### Added
-- **Memory management** — three-tier conversation memory system for long-running threads
-  - `thread.memory()` enables opt-in context window management
-  - **Working memory** — recent messages within token budget
-  - **Episodic memory** — LLM-generated summaries of compacted older messages
-  - **Semantic memory** — structured facts extracted from conversation, injected into system prompt
-- `TokenCounter` — approximate token estimation per provider (~4 chars/token)
-- `ContextBudget` — budget allocation across system prompt, summaries, facts, and working messages
-- `MemoryManager` — orchestrates compaction and fact extraction
-- `SemanticMemory` — in-memory fact store with `<known_facts>` prompt injection
-- `SummarizeStrategy` — LLM-powered compaction with optional fact extraction
-- `SlidingWindowStrategy` — drop oldest messages without summarization
-- `InMemoryThreadStore` — default `ThreadStore` implementation for dev/testing
-- `ThreadStore` interface — pluggable persistence (implement for database-backed storage)
-- `BrainManager.useThreadStore()` — register a thread store for persistence
-- `BrainManager.memoryConfig` / `BrainManager.threadStore` — accessors for memory configuration
-- `thread.id()` — set thread identifier for persistence
-- `thread.persist()` — enable auto-save to ThreadStore after each `send()`
-- `thread.facts` / `thread.episodicSummary` — access memory state
-- `thread.serializeMemory()` / `thread.restoreMemory()` — extended serialization with memory state
-- `BrainConfig.memory` — optional `MemoryConfig` field for global memory settings
-## 0.1.1
-### Changed
-- Applied consistent code formatting across all source files

package/README.md DELETED Viewed

@@ -1,121 +0,0 @@
-# @strav/brain
-AI module for the [Strav](https://www.npmjs.com/package/@strav/core) framework. Provides a unified interface for AI providers with support for agents, threads, tool use, and multi-step workflows.
-## Install
-```bash
-bun add @strav/brain
-```
-Requires `@strav/core` as a peer dependency.
-## Providers
-- **Anthropic** (Claude)
-- **OpenAI** (GPT, also works with DeepSeek via custom `baseUrl`)
-- **Google** (Gemini)
-## Usage
-```ts
-import { brain } from '@strav/brain'
-// One-shot chat
-const response = await brain.chat('Explain quantum computing')
-// Streaming
-for await (const chunk of brain.stream('Write a poem')) {
-  process.stdout.write(chunk.text)
-}
-// Structured output with Zod
-import { z } from 'zod'
-const result = await brain.generate('List 3 colors', {
-  schema: z.object({ colors: z.array(z.string()) }),
-})
-// Embeddings
-const vectors = await brain.embed('Hello world')
-```
-## Tools
-Define tools that AI agents can use:
-```ts
-import { defineTool } from '@strav/brain'
-import { z } from 'zod'
-const searchTool = defineTool({
-  name: 'search',
-  description: 'Search the database',
-  parameters: z.object({ query: z.string() }),
-  execute: async ({ query }, context) => {
-    const userId = context?.userId
-    return await db.search(query, { userId })
-  },
-})
-```
-The `execute` function receives two parameters:
-- `args` - The parsed and validated tool arguments
-- `context` - Optional context object passed from the agent runner
-## Agents
-```ts
-import { Agent, defineTool } from '@strav/brain'
-class ResearchAgent extends Agent {
-  provider = 'anthropic'
-  model = 'claude-sonnet-4-20250514'
-  instructions = 'You are a research assistant.'
-  tools = [searchTool, summarizeTool]
-}
-// Google Gemini agent
-class GeminiResearchAgent extends Agent {
-  provider = 'google'
-  model = 'gemini-2.0-flash'
-  instructions = 'You are a research assistant powered by Gemini.'
-  tools = [searchTool, summarizeTool]
-}
-// Run agent with context
-const runner = brain.agent(ResearchAgent)
-runner.context({ userId: '123' }) // Pass context to tools
-const result = await runner.input('Find info on Bun').run()
-```
-## Threads
-Multi-turn conversations with serialization support:
-```ts
-const thread = brain.thread({ provider: 'anthropic', model: 'claude-sonnet-4-20250514' })
-await thread.send('Hello')
-await thread.send('Tell me more')
-const saved = thread.serialize() // persist and restore later
-// Google Gemini example
-const geminiThread = brain.thread({ provider: 'google', model: 'gemini-2.0-flash' })
-await geminiThread.send('Explain quantum computing')
-```
-## Workflows
-Orchestrate multi-agent pipelines:
-```ts
-const workflow = brain.workflow()
-  .step('research', ResearchAgent)
-  .step('summarize', SummaryAgent)
-  .parallel('review', [FactCheckAgent, StyleAgent])
-const result = await workflow.run('Analyze this topic')
-```
-## License
-MIT

package/src/agent.ts DELETED Viewed

@@ -1,93 +0,0 @@
-import type {
-  ToolDefinition,
-  ToolCall,
-  ToolCallRecord,
-  AgentResult,
-  OutputSchema,
-} from './types.ts'
-/**
- * Base class for AI agents.
- *
- * Extend this class to define an agent with custom instructions,
- * tools, structured output, and lifecycle hooks.
- *
- * @example
- * class SupportAgent extends Agent {
- *   provider = 'anthropic'
- *   model = 'claude-sonnet-4-5-20250929'
- *   instructions = 'You are a customer support agent.'
- *   tools = [searchTool, lookupOrderTool]
- *
- *   output = z.object({
- *     reply: z.string(),
- *     category: z.enum(['billing', 'shipping', 'product', 'other']),
- *   })
- *
- *   onToolCall(call: ToolCall) {
- *     console.log(`Calling tool: ${call.name}`)
- *   }
- * }
- */
-export abstract class Agent {
-  /** Provider name (e.g., 'anthropic', 'openai'). Falls back to config default. */
-  provider?: string
-  /** Model identifier. Falls back to the provider's configured default model. */
-  model?: string
-  /** System prompt / instructions for this agent. Supports `{{key}}` context interpolation. */
-  instructions: string = ''
-  /** Tools available to this agent during execution. */
-  tools?: ToolDefinition[]
-  /** Structured output schema (Zod or JSON Schema). When set, the final response is parsed and validated. */
-  output?: OutputSchema
-  /** Maximum tool-use loop iterations before forcing a stop. Falls back to config default (10). */
-  maxIterations?: number
-  /** Maximum tokens per completion request. Falls back to config default (4096). */
-  maxTokens?: number
-  /** Temperature for completion requests. Falls back to config default (0.7). */
-  temperature?: number
-  // ── Lifecycle hooks (optional) ───────────────────────────────────────────
-  /** Called before the first completion request. */
-  onStart?(input: string, context: Record<string, unknown>): void | Promise<void>
-  /** Called when the model requests a tool call, before execution. */
-  onToolCall?(call: ToolCall): void | Promise<void>
-  /**
-   * Called before a tool is executed. Return `true` to suspend the agent loop
-   * before running this tool call; the runner will return a `SuspendedRun`
-   * with a JSON-serializable snapshot of the loop state. Resume later via
-   * `AgentRunner.resume(state, toolResults)` once the tool result is known.
-   *
-   * This is a policy-free primitive: the framework does not attach meaning
-   * to suspension. Integrators can use it to gate mutating tools on human
-   * approval, dispatch a tool to an external worker, rate-limit, etc.
-   *
-   * When suspension occurs mid-batch, the triggering call and any remaining
-   * unprocessed calls in the same batch are captured together in
-   * `pendingToolCalls` so the provider's tool_use/tool_result contract stays
-   * balanced on resume.
-   */
-  shouldSuspend?(
-    call: ToolCall,
-    context: Record<string, unknown>
-  ): boolean | Promise<boolean>
-  /** Called after a tool finishes execution. */
-  onToolResult?(call: ToolCallRecord): void | Promise<void>
-  /** Called when the agent run completes successfully. */
-  onComplete?(result: AgentResult): void | Promise<void>
-  /** Called when the agent run encounters an error. */
-  onError?(error: Error): void | Promise<void>
-}