npm - @reactive-agents/llm-provider - Versions diffs - 0.5.0 → 0.5.5 - Mend

@reactive-agents/llm-provider 0.5.0 → 0.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/dist/index.d.ts CHANGED Viewed

@@ -3,243 +3,807 @@ import * as effect_Cause from 'effect/Cause';
 import * as effect_Types from 'effect/Types';
 import * as effect_Duration from 'effect/Duration';
-declare const LLMProviderType: Schema.Literal<["anthropic", "openai", "ollama", "gemini", "custom"]>;
+/**
+ * Schema for LLM provider selection.
+ * Supported providers: anthropic, openai, ollama, gemini, litellm, custom.
+ *
+ * @example
+ * ```typescript
+ * const provider: LLMProvider = "anthropic";
+ * ```
+ */
+declare const LLMProviderType: Schema.Literal<["anthropic", "openai", "ollama", "gemini", "litellm", "custom"]>;
+/**
+ * Union of supported LLM provider names.
+ * - "anthropic": Claude models via Anthropic API
+ * - "openai": GPT models via OpenAI API
+ * - "ollama": Local models via Ollama
+ * - "gemini": Google Gemini models
+ * - "litellm": LiteLLM proxy (40+ model providers)
+ * - "custom": User-defined provider adapter
+ */
 type LLMProvider = Schema.Schema.Type<typeof LLMProviderType>;
+/**
+ * Schema for embedding model configuration.
+ * Embeddings are used for semantic caching, memory similarity search, and verification.
+ * Anthropic provides no embeddings API; embeddings always route to OpenAI or Ollama.
+ *
+ * @example
+ * ```typescript
+ * const config: EmbeddingConfig = {
+ *   model: "text-embedding-3-small",
+ *   dimensions: 1536,
+ *   provider: "openai",
+ *   batchSize: 100
+ * };
+ * ```
+ */
 declare const EmbeddingConfigSchema: Schema.Struct<{
+    /** Embedding model name (e.g., "text-embedding-3-small") */
     model: typeof Schema.String;
+    /** Output embedding vector dimensionality */
     dimensions: typeof Schema.Number;
+    /** Provider hosting the embedding model */
     provider: Schema.Literal<["openai", "ollama"]>;
+    /** Maximum vectors to embed in a single API call (default: 100) */
     batchSize: Schema.optional<typeof Schema.Number>;
 }>;
+/**
+ * Embedding configuration type.
+ * Specifies the embedding model and provider for semantic operations.
+ */
 type EmbeddingConfig = Schema.Schema.Type<typeof EmbeddingConfigSchema>;
+/**
+ * Default embedding configuration.
+ * Uses OpenAI's text-embedding-3-small with 1536 dimensions.
+ *
+ * @default { model: "text-embedding-3-small", dimensions: 1536, provider: "openai", batchSize: 100 }
+ */
 declare const DefaultEmbeddingConfig: EmbeddingConfig;
+/**
+ * Schema for LLM model configuration options.
+ * Includes provider, model name, and optional sampling/output parameters.
+ *
+ * @example
+ * ```typescript
+ * const config: ModelConfig = {
+ *   provider: "anthropic",
+ *   model: "claude-opus-4-20250514",
+ *   maxTokens: 4096,
+ *   temperature: 0.7
+ * };
+ * ```
+ */
 declare const ModelConfigSchema: Schema.Struct<{
-    provider: Schema.Literal<["anthropic", "openai", "ollama", "gemini", "custom"]>;
+    /** LLM provider identifier */
+    provider: Schema.Literal<["anthropic", "openai", "ollama", "gemini", "litellm", "custom"]>;
+    /** Model name/identifier for the provider */
     model: typeof Schema.String;
+    /** Maximum tokens in response (optional) */
     maxTokens: Schema.optional<typeof Schema.Number>;
+    /** Sampling temperature 0.0-1.0 (optional) */
     temperature: Schema.optional<typeof Schema.Number>;
+    /** Top-p (nucleus) sampling probability (optional) */
     topP: Schema.optional<typeof Schema.Number>;
+    /** Stop sequences to halt generation (optional) */
     stopSequences: Schema.optional<Schema.Array$<typeof Schema.String>>;
 }>;
+/**
+ * LLM model configuration type.
+ * Specifies which LLM to use and how to configure its behavior.
+ */
 type ModelConfig = Schema.Schema.Type<typeof ModelConfigSchema>;
+/**
+ * Pre-configured model profiles for popular LLMs.
+ * Each preset includes cost estimates, context window, and quality tiers.
+ * Quality tier: 0.0 (low) to 1.0 (highest).
+ * Cost: per 1 million input/output tokens in USD.
+ *
+ * @example
+ * ```typescript
+ * const preset = ModelPresets["claude-opus"];
+ * // { provider: "anthropic", model: "claude-opus-4-20250514", costPer1MInput: 15.0, ... }
+ * ```
+ */
 declare const ModelPresets: {
+    /**
+     * Claude 3.5 Haiku — fast, cost-effective Anthropic model.
+     * Best for low-latency, simple reasoning tasks; not recommended for complex analysis.
+     */
     readonly "claude-haiku": {
         readonly provider: "anthropic";
         readonly model: "claude-3-5-haiku-20241022";
+        /** Cost per 1 million input tokens in USD */
         readonly costPer1MInput: 1;
+        /** Cost per 1 million output tokens in USD */
         readonly costPer1MOutput: 5;
+        /** Maximum context window in tokens */
         readonly maxContext: 200000;
+        /** Quality tier (0.6 = reliable for simple tasks) */
         readonly quality: 0.6;
     };
+    /**
+     * Claude Sonnet 4 — balanced Anthropic model.
+     * Recommended for general-purpose reasoning, tool use, and production agents.
+     */
     readonly "claude-sonnet": {
         readonly provider: "anthropic";
         readonly model: "claude-sonnet-4-20250514";
         readonly costPer1MInput: 3;
         readonly costPer1MOutput: 15;
         readonly maxContext: 200000;
+        /** Quality tier (0.85 = excellent reasoning) */
         readonly quality: 0.85;
     };
+    /**
+     * Claude Sonnet 4.5 — latest Anthropic model.
+     * Superior reasoning over Sonnet 4; recommended for complex multi-step reasoning.
+     */
     readonly "claude-sonnet-4-5": {
         readonly provider: "anthropic";
         readonly model: "claude-sonnet-4-5-20250929";
         readonly costPer1MInput: 3;
         readonly costPer1MOutput: 15;
         readonly maxContext: 200000;
+        /** Quality tier (0.9 = very strong reasoning) */
         readonly quality: 0.9;
     };
+    /**
+     * Claude Opus 4 — most capable Anthropic model.
+     * Best for complex analysis, research, and high-accuracy multi-hop reasoning.
+     * Largest context window (1M tokens); highest cost.
+     */
     readonly "claude-opus": {
         readonly provider: "anthropic";
         readonly model: "claude-opus-4-20250514";
         readonly costPer1MInput: 15;
         readonly costPer1MOutput: 75;
         readonly maxContext: 1000000;
+        /** Quality tier (1.0 = frontier-class reasoning) */
         readonly quality: 1;
     };
+    /**
+     * GPT-4o Mini — fast, low-cost OpenAI model.
+     * Good for simple tasks and high-throughput scenarios.
+     */
     readonly "gpt-4o-mini": {
         readonly provider: "openai";
         readonly model: "gpt-4o-mini";
         readonly costPer1MInput: 0.15;
         readonly costPer1MOutput: 0.6;
         readonly maxContext: 128000;
+        /** Quality tier (0.55 = capable but less reliable for complex reasoning) */
         readonly quality: 0.55;
     };
+    /**
+     * GPT-4o — latest OpenAI flagship model.
+     * Strong reasoning, multimodal support; recommended for tool use and complex analysis.
+     */
     readonly "gpt-4o": {
         readonly provider: "openai";
         readonly model: "gpt-4o";
         readonly costPer1MInput: 2.5;
         readonly costPer1MOutput: 10;
         readonly maxContext: 128000;
+        /** Quality tier (0.8 = very good reasoning) */
         readonly quality: 0.8;
     };
+    /**
+     * Gemini 2.0 Flash — fast Google model.
+     * Excellent speed and cost efficiency; large 1M context window.
+     */
     readonly "gemini-2.0-flash": {
         readonly provider: "gemini";
         readonly model: "gemini-2.0-flash";
         readonly costPer1MInput: 0.1;
         readonly costPer1MOutput: 0.4;
         readonly maxContext: 1000000;
+        /** Quality tier (0.75 = good reasoning) */
         readonly quality: 0.75;
     };
+    /**
+     * Gemini 2.5 Pro Preview — advanced Google model.
+     * Superior reasoning to Flash; large context window and competitive pricing.
+     */
     readonly "gemini-2.5-pro": {
         readonly provider: "gemini";
         readonly model: "gemini-2.5-pro-preview-03-25";
         readonly costPer1MInput: 1.25;
         readonly costPer1MOutput: 10;
         readonly maxContext: 1000000;
+        /** Quality tier (0.95 = excellent reasoning) */
         readonly quality: 0.95;
     };
 };
+/**
+ * Union of all model preset names.
+ * Use to select a pre-configured model with cost/quality/context metadata.
+ *
+ * @example
+ * ```typescript
+ * const presetName: ModelPresetName = "claude-opus";
+ * const preset = ModelPresets[presetName];
+ * ```
+ */
 type ModelPresetName = keyof typeof ModelPresets;
+/**
+ * Schema for Anthropic prompt caching control.
+ * Currently only supports "ephemeral" type (cache for this request only).
+ * Non-Anthropic providers silently ignore cache_control directives.
+ *
+ * @example
+ * ```typescript
+ * const cacheControl: CacheControl = { type: "ephemeral" };
+ * ```
+ */
 declare const CacheControlSchema: Schema.Struct<{
+    /** Cache type: "ephemeral" for request-scoped caching */
     type: Schema.Literal<["ephemeral"]>;
 }>;
+/**
+ * Anthropic prompt caching configuration.
+ * Wraps text content blocks to enable prompt caching optimization.
+ * Reduces costs for repeated context; only supported on Anthropic provider.
+ */
 type CacheControl = Schema.Schema.Type<typeof CacheControlSchema>;
+/**
+ * Schema for image source reference.
+ * Supports base64-encoded or URL-referenced images in PNG, JPEG, GIF, or WebP format.
+ *
+ * @example
+ * ```typescript
+ * const source: ImageSource = {
+ *   type: "base64",
+ *   media_type: "image/png",
+ *   data: "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg=="
+ * };
+ * ```
+ */
 declare const ImageSourceSchema: Schema.Struct<{
+    /** Image source type: "base64" for encoded data or "url" for HTTP(S) URL */
     type: Schema.Literal<["base64", "url"]>;
+    /** MIME type of image: PNG, JPEG, GIF, or WebP */
     media_type: Schema.Literal<["image/png", "image/jpeg", "image/gif", "image/webp"]>;
+    /** Either base64-encoded data or HTTPS URL */
     data: typeof Schema.String;
 }>;
+/**
+ * Image source reference type.
+ * Either a base64-encoded image or an HTTPS URL to an image resource.
+ */
 type ImageSource = Schema.Schema.Type<typeof ImageSourceSchema>;
+/**
+ * Schema for text content blocks.
+ * Supports optional Anthropic prompt caching via cache_control.
+ *
+ * @example
+ * ```typescript
+ * const textBlock: TextContentBlock = {
+ *   type: "text",
+ *   text: "This is a text message"
+ * };
+ * ```
+ */
 declare const TextContentBlockSchema: Schema.Struct<{
+    /** Content type identifier */
     type: Schema.Literal<["text"]>;
+    /** Text content */
     text: typeof Schema.String;
+    /** Optional Anthropic cache control directive */
     cache_control: Schema.optional<Schema.Struct<{
+        /** Cache type: "ephemeral" for request-scoped caching */
         type: Schema.Literal<["ephemeral"]>;
     }>>;
 }>;
+/**
+ * Schema for image content blocks.
+ *
+ * @example
+ * ```typescript
+ * const imageBlock: ImageContentBlock = {
+ *   type: "image",
+ *   source: { type: "url", media_type: "image/png", data: "https://..." }
+ * };
+ * ```
+ */
 declare const ImageContentBlockSchema: Schema.Struct<{
+    /** Content type identifier */
     type: Schema.Literal<["image"]>;
+    /** Image source reference */
     source: Schema.Struct<{
+        /** Image source type: "base64" for encoded data or "url" for HTTP(S) URL */
         type: Schema.Literal<["base64", "url"]>;
+        /** MIME type of image: PNG, JPEG, GIF, or WebP */
         media_type: Schema.Literal<["image/png", "image/jpeg", "image/gif", "image/webp"]>;
+        /** Either base64-encoded data or HTTPS URL */
         data: typeof Schema.String;
     }>;
 }>;
+/**
+ * Schema for tool use content blocks (model invoking a tool).
+ *
+ * @example
+ * ```typescript
+ * const toolBlock: ToolUseContentBlock = {
+ *   type: "tool_use",
+ *   id: "toolu_123",
+ *   name: "file-read",
+ *   input: { path: "./output.txt" }
+ * };
+ * ```
+ */
 declare const ToolUseContentBlockSchema: Schema.Struct<{
+    /** Content type identifier */
     type: Schema.Literal<["tool_use"]>;
+    /** Unique tool call identifier */
     id: typeof Schema.String;
+    /** Tool name being invoked */
     name: typeof Schema.String;
+    /** Tool parameters (JSON-compatible object) */
     input: typeof Schema.Unknown;
 }>;
+/**
+ * Schema for tool result content blocks (system returning tool output).
+ *
+ * @example
+ * ```typescript
+ * const resultBlock: ToolResultContentBlock = {
+ *   type: "tool_result",
+ *   tool_use_id: "toolu_123",
+ *   content: "File contents..."
+ * };
+ * ```
+ */
 declare const ToolResultContentBlockSchema: Schema.Struct<{
+    /** Content type identifier */
     type: Schema.Literal<["tool_result"]>;
+    /** ID of tool call this result corresponds to */
     tool_use_id: typeof Schema.String;
+    /** Tool result/output content */
     content: typeof Schema.String;
 }>;
+/**
+ * Union of all content block types used in LLM messages.
+ * Content blocks allow mixing text, images, tool invocations, and tool results.
+ *
+ * @example
+ * ```typescript
+ * const blocks: readonly ContentBlock[] = [
+ *   { type: "text", text: "Analyze this image:" },
+ *   { type: "image", source: { type: "url", media_type: "image/png", data: "https://..." } }
+ * ];
+ * ```
+ */
 type ContentBlock = {
+    /** Text content (optionally cached with Anthropic) */
     readonly type: "text";
     readonly text: string;
     readonly cache_control?: CacheControl;
 } | {
+    /** Image content */
     readonly type: "image";
     readonly source: ImageSource;
 } | {
+    /** Model invoking a tool */
     readonly type: "tool_use";
     readonly id: string;
     readonly name: string;
     readonly input: unknown;
 } | {
+    /** System returning tool output */
     readonly type: "tool_result";
     readonly tool_use_id: string;
     readonly content: string;
 };
+/**
+ * Text content block with cache control enabled.
+ * Used when text context should be cached for cost reduction (Anthropic only).
+ * Non-Anthropic providers silently ignore the cache_control directive.
+ *
+ * @example
+ * ```typescript
+ * const cached: CacheableContentBlock = {
+ *   type: "text",
+ *   text: "Expensive context (system prompt, instructions, etc)",
+ *   cache_control: { type: "ephemeral" }
+ * };
+ * ```
+ */
 type CacheableContentBlock = {
+    /** Always "text" */
     readonly type: "text";
+    /** Cached text content */
     readonly text: string;
+    /** Cache control directive (always ephemeral) */
     readonly cache_control: CacheControl;
 };
 /**
- * Helper — wrap text in a cacheable content block.
- * Non-Anthropic providers silently ignore `cache_control`.
+ * Wrap plain text in a cacheable content block.
+ * Enables Anthropic prompt caching for the given text (no-op for other providers).
+ * Useful for repeated context like system prompts, instructions, or reference documents.
+ *
+ * @param text — The text to cache
+ * @returns A content block with ephemeral cache control enabled
+ *
+ * @example
+ * ```typescript
+ * const cached = makeCacheable("You are a helpful assistant...");
+ * // Returns: { type: "text", text: "...", cache_control: { type: "ephemeral" } }
+ * ```
  */
 declare const makeCacheable: (text: string) => CacheableContentBlock;
+/**
+ * Union of LLM message roles.
+ * Each message has a role (system, user, assistant, tool) and content.
+ *
+ * - **system**: Instructions/context set by the agent developer. Content is always a string.
+ * - **user**: User query or context provided by caller. Content is string or content blocks.
+ * - **assistant**: Model response or thoughts. Content is string or content blocks (including tool_use).
+ * - **tool**: Tool execution result returned to model. Content is always string.
+ *
+ * @example
+ * ```typescript
+ * const messages: readonly LLMMessage[] = [
+ *   { role: "system", content: "You are a helpful assistant." },
+ *   { role: "user", content: "What is 2+2?" },
+ *   { role: "assistant", content: "2+2 equals 4." }
+ * ];
+ *
+ * const withTools: readonly LLMMessage[] = [
+ *   { role: "user", content: "Read the file." },
+ *   {
+ *     role: "assistant",
+ *     content: [
+ *       { type: "text", text: "I'll read that file for you." },
+ *       { type: "tool_use", id: "toolu_1", name: "file-read", input: { path: "./data.txt" } }
+ *     ]
+ *   },
+ *   { role: "tool", toolCallId: "toolu_1", content: "File contents here..." }
+ * ];
+ * ```
+ */
 type LLMMessage = {
+    /** System prompt/instructions — context set by developer */
     readonly role: "system";
+    /** Plain text string only (no content blocks) */
     readonly content: string;
 } | {
+    /** User input/query */
     readonly role: "user";
+    /** Plain text or multimodal content blocks */
     readonly content: string | readonly ContentBlock[];
 } | {
+    /** Model response or reasoning */
     readonly role: "assistant";
+    /** Plain text or multimodal content blocks (including tool_use) */
     readonly content: string | readonly ContentBlock[];
 } | {
+    /** Tool execution result */
     readonly role: "tool";
+    /** Tool call ID this result corresponds to */
     readonly toolCallId: string;
+    /** Plain text result/output */
     readonly content: string;
 };
+/**
+ * Schema for token usage statistics from an LLM response.
+ * Used for cost tracking, budget enforcement, and observability.
+ *
+ * @example
+ * ```typescript
+ * const usage: TokenUsage = {
+ *   inputTokens: 1200,
+ *   outputTokens: 450,
+ *   totalTokens: 1650,
+ *   estimatedCost: 0.0045
+ * };
+ * ```
+ */
 declare const TokenUsageSchema: Schema.Struct<{
+    /** Tokens consumed by the input (messages + system prompt) */
     inputTokens: typeof Schema.Number;
+    /** Tokens generated in the response */
     outputTokens: typeof Schema.Number;
+    /** Sum of input and output tokens */
     totalTokens: typeof Schema.Number;
+    /** Estimated cost in USD based on provider pricing */
     estimatedCost: typeof Schema.Number;
 }>;
+/**
+ * Token usage from an LLM response.
+ * Tracks input/output tokens separately for cost calculation.
+ */
 type TokenUsage = Schema.Schema.Type<typeof TokenUsageSchema>;
+/**
+ * Schema for LLM response termination reason.
+ * Indicates why the model stopped generating tokens.
+ *
+ * @example
+ * ```typescript
+ * const reason: StopReason = "end_turn"; // Model concluded naturally
+ * const reason2: StopReason = "max_tokens"; // Hit output limit
+ * ```
+ */
 declare const StopReasonSchema: Schema.Literal<["end_turn", "max_tokens", "stop_sequence", "tool_use"]>;
+/**
+ * Reason the LLM stopped generating.
+ *
+ * - **end_turn**: Model concluded naturally — response is complete.
+ * - **max_tokens**: Hit configured output token limit — response may be truncated.
+ * - **stop_sequence**: Hit a configured stop sequence — generation halted by design.
+ * - **tool_use**: Model is invoking a tool — `toolCalls` array is populated.
+ */
 type StopReason = Schema.Schema.Type<typeof StopReasonSchema>;
+/**
+ * Schema for tool definitions.
+ * Describes tools available to the LLM, including name, description, and input schema.
+ * Tools are passed to the LLM for function calling / tool use.
+ *
+ * @example
+ * ```typescript
+ * const tool: ToolDefinition = {
+ *   name: "file-read",
+ *   description: "Read a file from disk",
+ *   inputSchema: {
+ *     path: { type: "string", description: "File path", required: true }
+ *   }
+ * };
+ * ```
+ */
 declare const ToolDefinitionSchema: Schema.Struct<{
+    /** Tool identifier (used by model to invoke the tool) */
     name: typeof Schema.String;
+    /** Human-readable tool description for the model */
     description: typeof Schema.String;
+    /** Input schema describing expected parameters (JSON Schema format) */
     inputSchema: Schema.Record$<typeof Schema.String, typeof Schema.Unknown>;
 }>;
+/**
+ * Tool definition.
+ * Used to register available functions that the LLM can call.
+ * Input schema is a JSON Schema object defining parameters.
+ */
 type ToolDefinition = Schema.Schema.Type<typeof ToolDefinitionSchema>;
+/**
+ * Schema for tool invocation.
+ * Emitted by the model when it decides to call a tool.
+ *
+ * @example
+ * ```typescript
+ * const call: ToolCall = {
+ *   id: "toolu_123",
+ *   name: "file-read",
+ *   input: { path: "./output.txt" }
+ * };
+ * ```
+ */
 declare const ToolCallSchema: Schema.Struct<{
+    /** Unique tool call identifier (generated by model) */
     id: typeof Schema.String;
+    /** Tool name to invoke */
     name: typeof Schema.String;
+    /** Tool input parameters (arbitrary JSON-compatible object) */
     input: typeof Schema.Unknown;
 }>;
+/**
+ * Tool invocation from the LLM.
+ * When the model decides to call a tool, this describes which tool and with what inputs.
+ */
 type ToolCall = Schema.Schema.Type<typeof ToolCallSchema>;
+/**
+ * Request to the LLM for a completion.
+ * Includes messages, model configuration, tool definitions, and sampling parameters.
+ * Passed to LLMService.complete() for synchronous LLM calls.
+ *
+ * @see CompletionResponse — the response type returned by LLMService.complete()
+ * @see ToolDefinition — shape of entries in the `tools` array
+ * @see ModelConfig — shape of the `model` field
+ *
+ * @example
+ * ```typescript
+ * const request: CompletionRequest = {
+ *   messages: [
+ *     { role: "system", content: "You are a helpful assistant." },
+ *     { role: "user", content: "What is the capital of France?" }
+ *   ],
+ *   model: { provider: "anthropic", model: "claude-opus-4-20250514" },
+ *   maxTokens: 1024,
+ *   temperature: 0.7,
+ *   tools: [
+ *     { name: "web-search", description: "Search the web", inputSchema: { query: { type: "string" } } }
+ *   ]
+ * };
+ * ```
+ */
 type CompletionRequest = {
+    /** Conversation history (at least 1 message required) */
     readonly messages: readonly LLMMessage[];
+    /** Model config (provider + model name + optional sampling params) */
     readonly model?: ModelConfig;
+    /** Maximum response tokens (optional, uses config default if omitted) */
     readonly maxTokens?: number;
+    /** Sampling temperature 0.0-1.0 (optional, uses config default if omitted) */
     readonly temperature?: number;
+    /** Stop sequences to halt generation (optional) */
     readonly stopSequences?: readonly string[];
+    /** Tools available for the model to call (optional) */
     readonly tools?: readonly ToolDefinition[];
+    /** System prompt (optional, prepended to user messages) */
     readonly systemPrompt?: string;
 };
+/**
+ * Schema for LLM response.
+ * Contains the generated content, stop reason, token usage, and any tool calls.
+ *
+ * @example
+ * ```typescript
+ * const response: CompletionResponse = {
+ *   content: "The capital of France is Paris.",
+ *   stopReason: "end_turn",
+ *   usage: { inputTokens: 120, outputTokens: 15, totalTokens: 135, estimatedCost: 0.00041 },
+ *   model: "claude-opus-4-20250514",
+ *   toolCalls: undefined
+ * };
+ * ```
+ */
 declare const CompletionResponseSchema: Schema.Struct<{
+    /** Generated response content (text only, no content blocks) */
     content: typeof Schema.String;
+    /** Why the model stopped generating */
     stopReason: Schema.Literal<["end_turn", "max_tokens", "stop_sequence", "tool_use"]>;
+    /** Token usage statistics */
     usage: Schema.Struct<{
+        /** Tokens consumed by the input (messages + system prompt) */
         inputTokens: typeof Schema.Number;
+        /** Tokens generated in the response */
         outputTokens: typeof Schema.Number;
+        /** Sum of input and output tokens */
         totalTokens: typeof Schema.Number;
+        /** Estimated cost in USD based on provider pricing */
         estimatedCost: typeof Schema.Number;
     }>;
+    /** Actual model identifier used (may differ from request) */
     model: typeof Schema.String;
+    /** Tool calls emitted by the model (if any) */
     toolCalls: Schema.optional<Schema.Array$<Schema.Struct<{
+        /** Unique tool call identifier (generated by model) */
         id: typeof Schema.String;
+        /** Tool name to invoke */
         name: typeof Schema.String;
+        /** Tool input parameters (arbitrary JSON-compatible object) */
         input: typeof Schema.Unknown;
     }>>>;
 }>;
+/**
+ * LLM response to a completion request.
+ * Contains generated text, stop reason, usage metrics, and optional tool calls.
+ *
+ * @see CompletionRequest — the request type passed to LLMService.complete()
+ * @see StopReason — possible values for the `stopReason` field
+ * @see TokenUsage — shape of the `usage` field
+ * @see ToolCall — shape of entries in the optional `toolCalls` array
+ */
 type CompletionResponse = Schema.Schema.Type<typeof CompletionResponseSchema>;
+/**
+ * Events streamed during an LLM response.
+ * Used when streaming responses rather than waiting for full completion.
+ * Events arrive in sequence: text_delta(s), then tool_use_start/delta(s) if applicable, then content_complete, then usage.
+ *
+ * @example
+ * ```typescript
+ * const events: StreamEvent[] = [
+ *   { type: "text_delta", text: "The " },
+ *   { type: "text_delta", text: "capital " },
+ *   { type: "text_delta", text: "is Paris." },
+ *   { type: "content_complete", content: "The capital is Paris." },
+ *   { type: "usage", usage: { inputTokens: 50, outputTokens: 10, totalTokens: 60, estimatedCost: 0.00018 } }
+ * ];
+ * ```
+ */
 type StreamEvent = {
+    /** Text chunk arriving */
     readonly type: "text_delta";
+    /** Text chunk content */
     readonly text: string;
 } | {
+    /** Tool invocation starting */
     readonly type: "tool_use_start";
+    /** Unique tool call ID */
     readonly id: string;
+    /** Tool name being invoked */
     readonly name: string;
 } | {
+    /** Tool input parameter chunk arriving */
     readonly type: "tool_use_delta";
+    /** JSON parameter chunk (accumulated to form full input) */
     readonly input: string;
 } | {
+    /** Content generation completed */
     readonly type: "content_complete";
+    /** Full accumulated response content */
     readonly content: string;
 } | {
+    /** Token usage reported */
     readonly type: "usage";
+    /** Final token usage for the request */
     readonly usage: TokenUsage;
 } | {
+    /** Error occurred during streaming */
     readonly type: "error";
+    /** Error message */
     readonly error: string;
 };
+/**
+ * Completion request with structured output validation.
+ * Extends CompletionRequest to require the model output conform to a schema.
+ * Used when the agent needs guaranteed JSON schema output from the LLM.
+ *
+ * @see CompletionRequest — base request type this extends
+ *
+ * @typeParam A — The type that the LLM output must conform to
+ *
+ * @example
+ * ```typescript
+ * interface Decision {
+ *   readonly choice: "yes" | "no";
+ *   readonly confidence: number;
+ * }
+ *
+ * const request: StructuredCompletionRequest<Decision> = {
+ *   messages: [{ role: "user", content: "Should I approve this?" }],
+ *   outputSchema: Schema.Struct({
+ *     choice: Schema.Literal("yes", "no"),
+ *     confidence: Schema.Number
+ *   }),
+ *   maxParseRetries: 2
+ * };
+ * ```
+ */
 type StructuredCompletionRequest<A> = CompletionRequest & {
+    /** Schema that the LLM response must conform to */
     readonly outputSchema: Schema.Schema<A>;
+    /** If true, retry with corrected prompt if parse fails (default: false) */
     readonly retryOnParseFail?: boolean;
+    /** Maximum parse retry attempts before giving up (default: 1) */
     readonly maxParseRetries?: number;
 };
-type TruncationStrategy = "drop-oldest" | "summarize-middle" | "sliding-window" | "importance-based";
-type ObservabilityVerbosity = "metadata" | "full";
+/**
+ * Strategy for truncating context when it exceeds token budget.
+ * Used by ContextWindowManager when compacting message history for token limits.
+ *
+ * @example
+ * ```typescript
+ * const strategy: TruncationStrategy = "summarize-middle";
+ * ```
+ */
+type TruncationStrategy =
+/** Remove oldest messages first (FIFO). Fastest; may lose early context. */
+"drop-oldest"
+/** Summarize middle messages, preserving system prompt and most recent turns. */
+ | "summarize-middle"
+/** Keep only the most recent N messages; drops all prior history. */
+ | "sliding-window"
+/** Use heuristics to score and drop least-important messages first. */
+ | "importance-based";
+/**
+ * Observability verbosity level for LLM request events.
+ * Controls what is captured in each `LLMRequestEvent` published to the EventBus.
+ *
+ * @default "full"
+ *
+ * @example
+ * ```typescript
+ * const config = LLMConfig.of({
+ *   // ... other fields
+ *   observabilityVerbosity: process.env.NODE_ENV === "production" ? "metadata" : "full"
+ * });
+ * ```
+ */
+type ObservabilityVerbosity =
+/** Capture timing, token counts, and cost only — lightweight, production-safe. */
+"metadata"
+/** Capture complete request/response payloads — higher overhead, useful for debugging. */
+ | "full";
 declare const LLMError_base: new <A extends Record<string, any> = {}>(args: effect_Types.Equals<A, {}> extends true ? void : { readonly [P in keyof A as P extends "_tag" ? never : P]: A[P]; }) => effect_Cause.YieldableError & {
     readonly _tag: "LLMError";
@@ -349,78 +913,297 @@ declare class LLMService extends LLMService_base {
 }
 declare const LLMConfig_base: Context.TagClass<LLMConfig, "LLMConfig", {
+    /**
+     * Default LLM provider.
+     * Used as fallback when a request does not specify a provider.
+     *
+     * @default "anthropic"
+     */
     readonly defaultProvider: LLMProvider;
+    /**
+     * Default LLM model identifier.
+     * Used as fallback when a request does not specify a model.
+     *
+     * @default From LLM_DEFAULT_MODEL env var, falls back to "claude-sonnet-4-20250514"
+     */
     readonly defaultModel: string;
+    /**
+     * Anthropic API key.
+     * Retrieved from ANTHROPIC_API_KEY environment variable.
+     * Required if provider is "anthropic".
+     *
+     * @default From ANTHROPIC_API_KEY env var (undefined if not set)
+     */
     readonly anthropicApiKey?: string;
+    /**
+     * OpenAI API key.
+     * Retrieved from OPENAI_API_KEY environment variable.
+     * Required if provider is "openai".
+     *
+     * @default From OPENAI_API_KEY env var (undefined if not set)
+     */
     readonly openaiApiKey?: string;
+    /**
+     * Google API key.
+     * Retrieved from GOOGLE_API_KEY environment variable.
+     * Required if provider is "gemini".
+     *
+     * @default From GOOGLE_API_KEY env var (undefined if not set)
+     */
     readonly googleApiKey?: string;
+    /**
+     * Ollama server endpoint.
+     * Retrieved from OLLAMA_ENDPOINT environment variable.
+     * Used for local model serving.
+     *
+     * @default "http://localhost:11434"
+     */
     readonly ollamaEndpoint?: string;
     /**
-     * Embedding configuration. Anthropic has no embeddings API;
-     * embeddings route to OpenAI (default) or Ollama.
-     * This is the SOLE embedding config for the entire framework.
+     * Embedding configuration — model, provider, dimensions.
+     * Anthropic has no embeddings API; embeddings always route to OpenAI or Ollama.
+     * This is the sole embedding config for the entire framework.
+     * Used by semantic cache, memory similarity search, and verification layers.
+     *
+     * @default { model: "text-embedding-3-small", dimensions: 1536, provider: "openai", batchSize: 100 }
      */
     readonly embeddingConfig: EmbeddingConfig;
     /**
      * Enable Anthropic prompt caching.
-     * When true, memory context injections are wrapped in
-     * `cache_control: { type: "ephemeral" }` blocks.
+     * When true, memory context injections and system prompts are wrapped in
+     * `cache_control: { type: "ephemeral" }` blocks to reduce costs.
+     * Non-Anthropic providers silently ignore cache control directives.
+     * Automatically set to true if defaultModel starts with "claude".
+     *
+     * @default true if defaultModel starts with "claude", false otherwise
      */
     readonly supportsPromptCaching: boolean;
+    /**
+     * Maximum number of retries for transient LLM request failures.
+     * Applied with exponential backoff (2^n seconds between attempts).
+     *
+     * @default 3
+     */
     readonly maxRetries: number;
+    /**
+     * Request timeout in milliseconds.
+     * LLM requests exceeding this duration are aborted.
+     *
+     * @default 30000 (30 seconds)
+     */
     readonly timeoutMs: number;
+    /**
+     * Default maximum output tokens for LLM responses.
+     * Used if a CompletionRequest does not specify maxTokens.
+     * Set lower for faster responses; higher for longer outputs.
+     *
+     * @default 4096
+     */
     readonly defaultMaxTokens: number;
+    /**
+     * Default sampling temperature (0.0-1.0).
+     * Used if a CompletionRequest does not specify temperature.
+     * 0.0 = deterministic; 1.0 = maximum randomness.
+     *
+     * @default 0.7 (good balance of creativity and coherence)
+     */
     readonly defaultTemperature: number;
     /**
      * LLM request/response observability verbosity.
-     * "full" captures the complete request and response payloads.
-     * "metadata" captures only timing/token counts (cheaper for production).
-     * Default: "full" — always capture everything during development.
+     * Determines what data is captured in LLMRequestEvent for observability.
+     *
+     * - **"full"**: Capture complete request/response payloads (useful for debugging, higher overhead)
+     * - **"metadata"**: Capture only timing, token counts, and cost (lightweight, production-safe)
+     *
+     * @default "full" (capture everything)
+     *
+     * @example
+     * ```typescript
+     * // Development: full details
+     * observabilityVerbosity: process.env.NODE_ENV === "production" ? "metadata" : "full"
+     * ```
      */
     readonly observabilityVerbosity: ObservabilityVerbosity;
 }>;
 /**
- * LLM configuration — provided via environment or config file.
+ * LLM service configuration.
+ * Provides API keys, default model settings, timeouts, and observability verbosity.
+ * Typically constructed from environment variables via llmConfigFromEnv.
+ *
+ * @example
+ * ```typescript
+ * const config = LLMConfig.of({
+ *   defaultProvider: "anthropic",
+ *   defaultModel: "claude-opus-4-20250514",
+ *   anthropicApiKey: process.env.ANTHROPIC_API_KEY,
+ *   maxRetries: 3,
+ *   timeoutMs: 30000
+ * });
+ * ```
  */
 declare class LLMConfig extends LLMConfig_base {
 }
 /**
- * Raw LLMConfig value from environment variables.
- * Exported so callers can spread overrides (e.g. model) on top.
+ * Raw LLMConfig object constructed from environment variables.
+ * Reads all config from process.env with sensible defaults.
+ * Exported so callers can spread overrides (e.g. change model) on top.
+ *
+ * Environment variables:
+ * - LLM_DEFAULT_MODEL: Model identifier (default: claude-sonnet-4-20250514)
+ * - ANTHROPIC_API_KEY: Anthropic API key
+ * - OPENAI_API_KEY: OpenAI API key
+ * - GOOGLE_API_KEY: Google API key
+ * - OLLAMA_ENDPOINT: Ollama server URL (default: http://localhost:11434)
+ * - EMBEDDING_MODEL: Embedding model name (default: text-embedding-3-small)
+ * - EMBEDDING_DIMENSIONS: Embedding vector dimensions (default: 1536)
+ * - EMBEDDING_PROVIDER: Embedding provider (default: openai)
+ * - LLM_MAX_RETRIES: Retry attempts (default: 3)
+ * - LLM_TIMEOUT_MS: Request timeout in ms (default: 30000)
+ * - LLM_DEFAULT_TEMPERATURE: Sampling temperature (default: 0.7)
+ * - LLM_OBSERVABILITY_VERBOSITY: "full" or "metadata" (default: full)
+ *
+ * @example
+ * ```typescript
+ * // Use defaults from environment
+ * const config = llmConfigFromEnv;
+ *
+ * // Override specific fields
+ * const customConfig = LLMConfig.of({
+ *   ...llmConfigFromEnv,
+ *   defaultModel: "gpt-4o",
+ *   defaultProvider: "openai"
+ * });
+ * ```
  */
 declare const llmConfigFromEnv: {
+    /**
+     * Default LLM provider.
+     * Used as fallback when a request does not specify a provider.
+     *
+     * @default "anthropic"
+     */
     readonly defaultProvider: LLMProvider;
+    /**
+     * Default LLM model identifier.
+     * Used as fallback when a request does not specify a model.
+     *
+     * @default From LLM_DEFAULT_MODEL env var, falls back to "claude-sonnet-4-20250514"
+     */
     readonly defaultModel: string;
+    /**
+     * Anthropic API key.
+     * Retrieved from ANTHROPIC_API_KEY environment variable.
+     * Required if provider is "anthropic".
+     *
+     * @default From ANTHROPIC_API_KEY env var (undefined if not set)
+     */
     readonly anthropicApiKey?: string;
+    /**
+     * OpenAI API key.
+     * Retrieved from OPENAI_API_KEY environment variable.
+     * Required if provider is "openai".
+     *
+     * @default From OPENAI_API_KEY env var (undefined if not set)
+     */
     readonly openaiApiKey?: string;
+    /**
+     * Google API key.
+     * Retrieved from GOOGLE_API_KEY environment variable.
+     * Required if provider is "gemini".
+     *
+     * @default From GOOGLE_API_KEY env var (undefined if not set)
+     */
     readonly googleApiKey?: string;
+    /**
+     * Ollama server endpoint.
+     * Retrieved from OLLAMA_ENDPOINT environment variable.
+     * Used for local model serving.
+     *
+     * @default "http://localhost:11434"
+     */
     readonly ollamaEndpoint?: string;
     /**
-     * Embedding configuration. Anthropic has no embeddings API;
-     * embeddings route to OpenAI (default) or Ollama.
-     * This is the SOLE embedding config for the entire framework.
+     * Embedding configuration — model, provider, dimensions.
+     * Anthropic has no embeddings API; embeddings always route to OpenAI or Ollama.
+     * This is the sole embedding config for the entire framework.
+     * Used by semantic cache, memory similarity search, and verification layers.
+     *
+     * @default { model: "text-embedding-3-small", dimensions: 1536, provider: "openai", batchSize: 100 }
      */
     readonly embeddingConfig: EmbeddingConfig;
     /**
      * Enable Anthropic prompt caching.
-     * When true, memory context injections are wrapped in
-     * `cache_control: { type: "ephemeral" }` blocks.
+     * When true, memory context injections and system prompts are wrapped in
+     * `cache_control: { type: "ephemeral" }` blocks to reduce costs.
+     * Non-Anthropic providers silently ignore cache control directives.
+     * Automatically set to true if defaultModel starts with "claude".
+     *
+     * @default true if defaultModel starts with "claude", false otherwise
      */
     readonly supportsPromptCaching: boolean;
+    /**
+     * Maximum number of retries for transient LLM request failures.
+     * Applied with exponential backoff (2^n seconds between attempts).
+     *
+     * @default 3
+     */
     readonly maxRetries: number;
+    /**
+     * Request timeout in milliseconds.
+     * LLM requests exceeding this duration are aborted.
+     *
+     * @default 30000 (30 seconds)
+     */
     readonly timeoutMs: number;
+    /**
+     * Default maximum output tokens for LLM responses.
+     * Used if a CompletionRequest does not specify maxTokens.
+     * Set lower for faster responses; higher for longer outputs.
+     *
+     * @default 4096
+     */
     readonly defaultMaxTokens: number;
+    /**
+     * Default sampling temperature (0.0-1.0).
+     * Used if a CompletionRequest does not specify temperature.
+     * 0.0 = deterministic; 1.0 = maximum randomness.
+     *
+     * @default 0.7 (good balance of creativity and coherence)
+     */
     readonly defaultTemperature: number;
     /**
      * LLM request/response observability verbosity.
-     * "full" captures the complete request and response payloads.
-     * "metadata" captures only timing/token counts (cheaper for production).
-     * Default: "full" — always capture everything during development.
+     * Determines what data is captured in LLMRequestEvent for observability.
+     *
+     * - **"full"**: Capture complete request/response payloads (useful for debugging, higher overhead)
+     * - **"metadata"**: Capture only timing, token counts, and cost (lightweight, production-safe)
+     *
+     * @default "full" (capture everything)
+     *
+     * @example
+     * ```typescript
+     * // Development: full details
+     * observabilityVerbosity: process.env.NODE_ENV === "production" ? "metadata" : "full"
+     * ```
      */
     readonly observabilityVerbosity: ObservabilityVerbosity;
 };
 /**
- * Build LLMConfig from environment variables.
+ * Effect-TS Layer that provides LLMConfig from environment variables.
+ * Use this layer to automatically populate LLMConfig from process.env.
+ * Can be overridden with a custom layer for testing or custom configuration.
+ *
+ * @example
+ * ```typescript
+ * const effect = Effect.gen(function* () {
+ *   const config = yield* LLMConfig;
+ *   console.log(config.defaultModel);
+ * }).pipe(Effect.provide(LLMConfigFromEnv));
+ *
+ * Effect.runPromise(effect);
+ * ```
+ *
+ * @see llmConfigFromEnv
  */
 declare const LLMConfigFromEnv: Layer.Layer<LLMConfig, never, never>;
@@ -462,6 +1245,8 @@ declare const LocalProviderLive: Layer.Layer<LLMService, never, LLMConfig>;
 declare const GeminiProviderLive: Layer.Layer<LLMService, never, LLMConfig>;
+declare const LiteLLMProviderLive: Layer.Layer<LLMService, never, LLMConfig>;
 /**
  * Create a deterministic test LLM service.
  * Returns responses based on pattern matching against prompt content.
@@ -580,10 +1365,10 @@ type ComplexityAnalysis = Schema.Schema.Type<typeof ComplexityAnalysisSchema>;
  * Create the LLM provider layer for a specific provider.
  * Uses env vars for configuration by default.
  */
-declare const createLLMProviderLayer: (provider?: "anthropic" | "openai" | "ollama" | "gemini" | "test", testResponses?: Record<string, string>, model?: string) => Layer.Layer<LLMService | PromptManager, never, never>;
+declare const createLLMProviderLayer: (provider?: "anthropic" | "openai" | "ollama" | "gemini" | "litellm" | "test", testResponses?: Record<string, string>, model?: string) => Layer.Layer<LLMService | PromptManager, never, never>;
 /**
  * LLM layer with custom config (for programmatic use).
  */
-declare const createLLMProviderLayerWithConfig: (config: typeof LLMConfig.Service, provider?: "anthropic" | "openai" | "ollama" | "gemini") => Layer.Layer<LLMService | PromptManager, never, never>;
+declare const createLLMProviderLayerWithConfig: (config: typeof LLMConfig.Service, provider?: "anthropic" | "openai" | "ollama" | "gemini" | "litellm") => Layer.Layer<LLMService | PromptManager, never, never>;
-export { AnthropicProviderLive, type CacheControl, CacheControlSchema, type CacheableContentBlock, type CompletionRequest, type CompletionResponse, CompletionResponseSchema, type ComplexityAnalysis, ComplexityAnalysisSchema, type ContentBlock, DefaultEmbeddingConfig, type EmbeddingConfig, EmbeddingConfigSchema, GeminiProviderLive, ImageContentBlockSchema, type ImageSource, ImageSourceSchema, LLMConfig, LLMConfigFromEnv, LLMContextOverflowError, LLMError, type LLMErrors, type LLMMessage, LLMParseError, type LLMProvider, LLMProviderType, LLMRateLimitError, LLMService, LLMTimeoutError, LocalProviderLive, type ModelConfig, ModelConfigSchema, type ModelPresetName, ModelPresets, OpenAIProviderLive, type Plan, PlanSchema, PromptManager, PromptManagerLive, type ReActAction, ReActActionSchema, type Reflection, ReflectionSchema, type StopReason, StopReasonSchema, type StrategySelection, StrategySelectionSchema, type StreamEvent, type StructuredCompletionRequest, TestLLMService, TestLLMServiceLayer, TextContentBlockSchema, type ThoughtEvaluation, ThoughtEvaluationSchema, type TokenUsage, TokenUsageSchema, type ToolCall, ToolCallSchema, type ToolDefinition, ToolDefinitionSchema, ToolResultContentBlockSchema, ToolUseContentBlockSchema, type TruncationStrategy, calculateCost, createLLMProviderLayer, createLLMProviderLayerWithConfig, estimateTokenCount, llmConfigFromEnv, makeCacheable, retryPolicy };
+export { AnthropicProviderLive, type CacheControl, CacheControlSchema, type CacheableContentBlock, type CompletionRequest, type CompletionResponse, CompletionResponseSchema, type ComplexityAnalysis, ComplexityAnalysisSchema, type ContentBlock, DefaultEmbeddingConfig, type EmbeddingConfig, EmbeddingConfigSchema, GeminiProviderLive, ImageContentBlockSchema, type ImageSource, ImageSourceSchema, LLMConfig, LLMConfigFromEnv, LLMContextOverflowError, LLMError, type LLMErrors, type LLMMessage, LLMParseError, type LLMProvider, LLMProviderType, LLMRateLimitError, LLMService, LLMTimeoutError, LiteLLMProviderLive, LocalProviderLive, type ModelConfig, ModelConfigSchema, type ModelPresetName, ModelPresets, OpenAIProviderLive, type Plan, PlanSchema, PromptManager, PromptManagerLive, type ReActAction, ReActActionSchema, type Reflection, ReflectionSchema, type StopReason, StopReasonSchema, type StrategySelection, StrategySelectionSchema, type StreamEvent, type StructuredCompletionRequest, TestLLMService, TestLLMServiceLayer, TextContentBlockSchema, type ThoughtEvaluation, ThoughtEvaluationSchema, type TokenUsage, TokenUsageSchema, type ToolCall, ToolCallSchema, type ToolDefinition, ToolDefinitionSchema, ToolResultContentBlockSchema, ToolUseContentBlockSchema, type TruncationStrategy, calculateCost, createLLMProviderLayer, createLLMProviderLayerWithConfig, estimateTokenCount, llmConfigFromEnv, makeCacheable, retryPolicy };