npm - @reactive-agents/llm-provider - Versions diffs - 0.6.3 → 0.7.5 - Mend

@reactive-agents/llm-provider 0.6.3 → 0.7.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/README.md CHANGED Viewed

@@ -7,7 +7,7 @@ Provides a unified `LLMService` interface with adapters for Anthropic, OpenAI, G
 ## Installation
 ```bash
-bun add @reactive-agents/llm-provider effect
+bun add @reactive-agents/llm-provider
 ```
 Install the SDK for your chosen provider:
@@ -20,20 +20,23 @@ bun add @google/genai              # Google Gemini
 ## Supported Providers
-| Provider | Models | Streaming | Embeddings | Structured Output |
-|----------|--------|-----------|------------|------------------|
-| `anthropic` | claude-haiku, claude-sonnet, claude-opus | ✓ | — | ✓ |
-| `openai` | gpt-4o, gpt-4o-mini, o1-* | ✓ | ✓ | ✓ |
-| `gemini` | gemini-2.0-flash, gemini-2.5-pro | ✓ | ✓ | ✓ |
-| `ollama` | any local model | ✓ | ✓ | ✓ |
-| `test` | deterministic mock | ✓ | ✓ | — |
+| Provider    | Models                                   | Streaming | Embeddings | Structured Output |
+| ----------- | ---------------------------------------- | --------- | ---------- | ----------------- |
+| `anthropic` | claude-haiku, claude-sonnet, claude-opus | ✓         | —          | ✓                 |
+| `openai`    | gpt-4o, gpt-4o-mini, o1-\*               | ✓         | ✓          | ✓                 |
+| `gemini`    | gemini-2.0-flash, gemini-2.5-pro         | ✓         | ✓          | ✓                 |
+| `ollama`    | any local model                          | ✓         | ✓          | ✓                 |
+| `test`      | deterministic mock                       | ✓         | ✓          | —                 |
 ## Usage
 ### Anthropic
 ```typescript
-import { createLLMProviderLayer, LLMService } from "@reactive-agents/llm-provider";
+import {
+  createLLMProviderLayer,
+  LLMService,
+} from "@reactive-agents/llm-provider";
 import { Effect } from "effect";
 const layer = createLLMProviderLayer("anthropic");
@@ -51,7 +54,10 @@ const result = await Effect.runPromise(
 ### Google Gemini
 ```typescript
-import { createLLMProviderLayer, LLMService } from "@reactive-agents/llm-provider";
+import {
+  createLLMProviderLayer,
+  LLMService,
+} from "@reactive-agents/llm-provider";
 import { Effect } from "effect";
 // Set GOOGLE_API_KEY in your environment

package/dist/index.d.ts CHANGED Viewed

@@ -910,7 +910,7 @@ declare const LLMService_base: Context.TagClass<LLMService, "LLMService", {
      * Anthropic has no embeddings API — routes to OpenAI or Ollama
      * per LLMConfig.embeddingConfig.
      */
-    readonly embed: (texts: readonly string[], model?: string) => Effect.Effect<readonly number[][], LLMErrors>;
+    readonly embed: (texts: readonly string[], model?: string) => Effect.Effect<readonly (readonly number[])[], LLMErrors>;
     /**
      * Count tokens for a set of messages.
      * Used for context window management.
@@ -1306,7 +1306,7 @@ declare const TestLLMServiceLayer: (responses?: Record<string, string>) => Layer
 /**
  * Estimate token count for messages.
- * Uses a simple heuristic: ~4 characters per token for English text.
+ * Uses content-aware heuristics: ~3 chars/token for code/JSON, ~4 for English text.
  * This is used as a fallback when the provider's token counting API is unavailable.
  */
 declare const estimateTokenCount: (messages: readonly LLMMessage[]) => Effect.Effect<number, never>;
@@ -1321,6 +1321,12 @@ declare const calculateCost: (inputTokens: number, outputTokens: number, model:
  * Only retries on rate limit and timeout errors.
  */
 declare const retryPolicy: Schedule.Schedule<[number, effect_Duration.Duration], LLMErrors, never>;
+type CircuitBreakerConfig = {
+    readonly failureThreshold: number;
+    readonly cooldownMs: number;
+    readonly halfOpenRequests: number;
+};
+declare const defaultCircuitBreakerConfig: CircuitBreakerConfig;
 /**
  * Schema for ReAct action parsing.
@@ -1420,10 +1426,54 @@ declare const createLLMProviderLayer: (provider?: "anthropic" | "openai" | "olla
     thinking?: boolean;
     temperature?: number;
     maxTokens?: number;
-}) => Layer.Layer<LLMService | PromptManager, never, never>;
+}, circuitBreaker?: Partial<CircuitBreakerConfig>) => Layer.Layer<LLMService | PromptManager, never, never>;
 /**
  * LLM layer with custom config (for programmatic use).
  */
 declare const createLLMProviderLayerWithConfig: (config: typeof LLMConfig.Service, provider?: "anthropic" | "openai" | "ollama" | "gemini" | "litellm") => Layer.Layer<LLMService | PromptManager, never, never>;
-export { AnthropicProviderLive, type CacheControl, CacheControlSchema, type CacheableContentBlock, type CompletionRequest, type CompletionResponse, CompletionResponseSchema, type ComplexityAnalysis, ComplexityAnalysisSchema, type ContentBlock, DefaultEmbeddingConfig, type EmbeddingConfig, EmbeddingConfigSchema, GeminiProviderLive, ImageContentBlockSchema, type ImageSource, ImageSourceSchema, LLMConfig, LLMConfigFromEnv, LLMContextOverflowError, LLMError, type LLMErrors, type LLMMessage, LLMParseError, type LLMProvider, LLMProviderType, LLMRateLimitError, LLMService, LLMTimeoutError, LiteLLMProviderLive, LocalProviderLive, type ModelConfig, ModelConfigSchema, type ModelPresetName, ModelPresets, OpenAIProviderLive, PROVIDER_DEFAULT_MODELS, type Plan, PlanSchema, PromptManager, PromptManagerLive, type ReActAction, ReActActionSchema, type Reflection, ReflectionSchema, type StopReason, StopReasonSchema, type StrategySelection, StrategySelectionSchema, type StreamEvent, type StructuredCompletionRequest, type StructuredOutputCapabilities, TestLLMService, TestLLMServiceLayer, TextContentBlockSchema, type ThoughtEvaluation, ThoughtEvaluationSchema, type TokenUsage, TokenUsageSchema, type ToolCall, ToolCallSchema, type ToolDefinition, ToolDefinitionSchema, ToolResultContentBlockSchema, ToolUseContentBlockSchema, type TruncationStrategy, calculateCost, createLLMProviderLayer, createLLMProviderLayerWithConfig, estimateTokenCount, getProviderDefaultModel, llmConfigFromEnv, makeCacheable, retryPolicy };
+/**
+ * Content-hash embedding cache — deduplicates embed() calls per text.
+ * Cache is keyed by Bun.hash(text) and avoids re-embedding identical strings.
+ */
+interface EmbeddingCache {
+    /** Wrap an embed function with content-hash deduplication. */
+    readonly embed: (texts: readonly string[], model?: string) => Effect.Effect<readonly (readonly number[])[], LLMErrors>;
+    /** Number of cached embeddings. */
+    readonly size: () => number;
+    /** Clear all cached entries. */
+    readonly clear: () => void;
+}
+/**
+ * Create an embedding cache that wraps an underlying embed function.
+ * Each text is hashed individually; only cache-misses are sent to the LLM.
+ */
+declare const makeEmbeddingCache: (underlying: (texts: readonly string[], model?: string) => Effect.Effect<readonly (readonly number[])[], LLMErrors>) => EmbeddingCache;
+/**
+ * Circuit Breaker — prevents cascading failures by fast-failing when
+ * the underlying LLM provider is consistently erroring.
+ *
+ * States: CLOSED (normal) → OPEN (fast-fail) → HALF_OPEN (test one request)
+ */
+type State = "closed" | "open" | "half_open";
+interface CircuitBreaker {
+    /** Wrap an Effect with circuit breaker protection. */
+    readonly protect: <A>(effect: Effect.Effect<A, LLMErrors>) => Effect.Effect<A, LLMErrors>;
+    /** Current state. */
+    readonly state: () => State;
+    /** Reset to closed. */
+    readonly reset: () => void;
+}
+/**
+ * Create a circuit breaker with configurable thresholds.
+ *
+ * - After `failureThreshold` consecutive failures → OPEN (fast-fail).
+ * - After `cooldownMs` → HALF_OPEN (allow one test request).
+ * - If test request succeeds → CLOSED. If it fails → OPEN again.
+ */
+declare const makeCircuitBreaker: (config?: Partial<CircuitBreakerConfig>) => CircuitBreaker;
+export { AnthropicProviderLive, type CacheControl, CacheControlSchema, type CacheableContentBlock, type CircuitBreaker, type CircuitBreakerConfig, type CompletionRequest, type CompletionResponse, CompletionResponseSchema, type ComplexityAnalysis, ComplexityAnalysisSchema, type ContentBlock, DefaultEmbeddingConfig, type EmbeddingCache, type EmbeddingConfig, EmbeddingConfigSchema, GeminiProviderLive, ImageContentBlockSchema, type ImageSource, ImageSourceSchema, LLMConfig, LLMConfigFromEnv, LLMContextOverflowError, LLMError, type LLMErrors, type LLMMessage, LLMParseError, type LLMProvider, LLMProviderType, LLMRateLimitError, LLMService, LLMTimeoutError, LiteLLMProviderLive, LocalProviderLive, type ModelConfig, ModelConfigSchema, type ModelPresetName, ModelPresets, OpenAIProviderLive, PROVIDER_DEFAULT_MODELS, type Plan, PlanSchema, PromptManager, PromptManagerLive, type ReActAction, ReActActionSchema, type Reflection, ReflectionSchema, type StopReason, StopReasonSchema, type StrategySelection, StrategySelectionSchema, type StreamEvent, type StructuredCompletionRequest, type StructuredOutputCapabilities, TestLLMService, TestLLMServiceLayer, TextContentBlockSchema, type ThoughtEvaluation, ThoughtEvaluationSchema, type TokenUsage, TokenUsageSchema, type ToolCall, ToolCallSchema, type ToolDefinition, ToolDefinitionSchema, ToolResultContentBlockSchema, ToolUseContentBlockSchema, type TruncationStrategy, calculateCost, createLLMProviderLayer, createLLMProviderLayerWithConfig, defaultCircuitBreakerConfig, estimateTokenCount, getProviderDefaultModel, llmConfigFromEnv, makeCacheable, makeCircuitBreaker, makeEmbeddingCache, retryPolicy };

package/dist/index.js CHANGED Viewed

@@ -1432,25 +1432,36 @@ import { Effect as Effect3, Context as Context3, Layer as Layer2 } from "effect"
 // src/token-counter.ts
 import { Effect as Effect2 } from "effect";
+function charsPerToken(text) {
+  if (text.length === 0) return 4;
+  const sample = text.slice(0, 2e3);
+  const codeSignals = (sample.match(/[{}();=<>\[\]]/g) ?? []).length;
+  const jsonSignals = (sample.match(/"\w+"\s*:/g) ?? []).length;
+  const ratio = (codeSignals + jsonSignals) / sample.length;
+  if (ratio > 0.08) return 3;
+  if (ratio > 0.04) return 3.5;
+  return 4;
+}
 var estimateTokenCount = (messages) => Effect2.sync(() => {
-  let totalChars = 0;
+  let totalTokens = 0;
   for (const msg of messages) {
     if (typeof msg.content === "string") {
-      totalChars += msg.content.length;
+      totalTokens += Math.ceil(msg.content.length / charsPerToken(msg.content));
     } else {
       for (const block of msg.content) {
         if (block.type === "text") {
-          totalChars += block.text.length;
+          totalTokens += Math.ceil(block.text.length / charsPerToken(block.text));
         } else if (block.type === "tool_result") {
-          totalChars += block.content.length;
+          totalTokens += Math.ceil(block.content.length / charsPerToken(block.content));
         } else if (block.type === "tool_use") {
-          totalChars += JSON.stringify(block.input).length;
+          const json = JSON.stringify(block.input);
+          totalTokens += Math.ceil(json.length / 3);
         }
       }
     }
-    totalChars += 16;
+    totalTokens += 4;
   }
-  return Math.ceil(totalChars / 4);
+  return totalTokens;
 });
 var calculateCost = (inputTokens, outputTokens, model) => {
   const costMap = {
@@ -1578,6 +1589,11 @@ var retryPolicy = Schedule.intersect(
     (error) => error._tag === "LLMRateLimitError" || error._tag === "LLMTimeoutError"
   )
 );
+var defaultCircuitBreakerConfig = {
+  failureThreshold: 5,
+  cooldownMs: 3e4,
+  halfOpenRequests: 1
+};
 // src/providers/anthropic.ts
 var toAnthropicMessages = (messages) => messages.filter((m) => m.role !== "system").map((m) => {
@@ -1622,6 +1638,16 @@ var toEffectError = (error, provider) => {
     cause: error
   });
 };
+var MIN_SYSTEM_CACHE_CHARS = 4096;
+var buildSystemParam = (systemPrompt) => {
+  if (!systemPrompt) return void 0;
+  if (systemPrompt.length < MIN_SYSTEM_CACHE_CHARS) return systemPrompt;
+  return [{
+    type: "text",
+    text: systemPrompt,
+    cache_control: { type: "ephemeral" }
+  }];
+};
 var AnthropicProviderLive = Layer3.effect(
   LLMService,
   Effect4.gen(function* () {
@@ -1644,7 +1670,7 @@ var AnthropicProviderLive = Layer3.effect(
             model,
             max_tokens: request.maxTokens ?? config.defaultMaxTokens,
             temperature: request.temperature ?? config.defaultTemperature,
-            system: request.systemPrompt,
+            system: buildSystemParam(request.systemPrompt),
             messages: toAnthropicMessages(request.messages),
             stop_sequences: request.stopSequences ? [...request.stopSequences] : void 0,
             tools: request.tools?.map(toAnthropicTool)
@@ -1674,7 +1700,7 @@ var AnthropicProviderLive = Layer3.effect(
             model,
             max_tokens: request.maxTokens ?? config.defaultMaxTokens,
             temperature: request.temperature ?? config.defaultTemperature,
-            system: request.systemPrompt,
+            system: buildSystemParam(request.systemPrompt),
             messages: toAnthropicMessages(request.messages)
           });
           stream.on("text", (text) => {
@@ -1714,17 +1740,13 @@ var AnthropicProviderLive = Layer3.effect(
         });
       }),
       completeStructured: (request) => Effect4.gen(function* () {
-        const schemaStr = JSON.stringify(
-          Schema2.encodedSchema(request.outputSchema),
-          null,
-          2
-        );
+        const jsonSchema = Schema2.encodedSchema(request.outputSchema);
+        const schemaStr = JSON.stringify(jsonSchema, null, 2);
         const messagesWithFormat = [
           ...request.messages,
           {
             role: "user",
-            content: `
-Respond with ONLY valid JSON matching this schema:
+            content: `Respond with ONLY valid JSON matching this schema:
 ${schemaStr}
 No markdown, no code fences, just raw JSON.`
@@ -1741,9 +1763,11 @@ No markdown, no code fences, just raw JSON.`
             },
             {
               role: "user",
-              content: `That response was not valid JSON. The parse error was: ${String(lastError)}. Please try again with valid JSON only.`
+              content: `That response did not match the schema. Error: ${String(lastError)}. Please try again with valid JSON only.`
             }
           ];
+          const anthropicMsgs = toAnthropicMessages(msgs);
+          anthropicMsgs.push({ role: "assistant", content: "{" });
           const completeResult = yield* Effect4.tryPromise({
             try: () => {
               const client = getClient();
@@ -1751,8 +1775,8 @@ No markdown, no code fences, just raw JSON.`
                 model: typeof request.model === "string" ? request.model : request.model?.model ?? config.defaultModel,
                 max_tokens: request.maxTokens ?? config.defaultMaxTokens,
                 temperature: request.temperature ?? config.defaultTemperature,
-                system: request.systemPrompt,
-                messages: toAnthropicMessages(msgs)
+                system: buildSystemParam(request.systemPrompt),
+                messages: anthropicMsgs
               });
             },
             catch: (error) => toEffectError(error, "anthropic")
@@ -1761,8 +1785,9 @@ No markdown, no code fences, just raw JSON.`
             completeResult,
             typeof request.model === "string" ? request.model : request.model?.model ?? config.defaultModel
           );
+          const fullContent = "{" + response.content;
           try {
-            const parsed = JSON.parse(response.content);
+            const parsed = JSON.parse(fullContent);
             const decoded = Schema2.decodeUnknownEither(
               request.outputSchema
             )(parsed);
@@ -2031,49 +2056,56 @@ var OpenAIProviderLive = Layer4.effect(
         });
       }),
       completeStructured: (request) => Effect5.gen(function* () {
-        const schemaStr = JSON.stringify(
-          Schema3.encodedSchema(request.outputSchema),
-          null,
-          2
-        );
-        const messagesWithFormat = [
+        const jsonSchema = Schema3.encodedSchema(request.outputSchema);
+        const schemaObj = JSON.parse(JSON.stringify(jsonSchema));
+        const schemaStr = JSON.stringify(schemaObj, null, 2);
+        const model = typeof request.model === "string" ? request.model : request.model?.model ?? defaultModel;
+        const client = getClient();
+        const maxRetries = request.maxParseRetries ?? 2;
+        const requestBody = {
+          model,
+          max_tokens: request.maxTokens ?? config.defaultMaxTokens,
+          temperature: request.temperature ?? config.defaultTemperature,
+          response_format: {
+            type: "json_schema",
+            json_schema: {
+              name: "structured_output",
+              strict: true,
+              schema: schemaObj
+            }
+          }
+        };
+        const messages = [
           ...request.messages,
           {
             role: "user",
-            content: `
-Respond with ONLY valid JSON matching this schema:
-${schemaStr}
-No markdown, no code fences, just raw JSON.`
+            content: `Respond with JSON matching this schema:
+${schemaStr}`
           }
         ];
         let lastError = null;
-        const maxRetries = request.maxParseRetries ?? 2;
         for (let attempt = 0; attempt <= maxRetries; attempt++) {
-          const msgs = attempt === 0 ? messagesWithFormat : [
-            ...messagesWithFormat,
+          const msgs = attempt === 0 ? messages : [
+            ...messages,
             {
               role: "assistant",
               content: String(lastError)
             },
             {
               role: "user",
-              content: `That response was not valid JSON. The parse error was: ${String(lastError)}. Please try again with valid JSON only.`
+              content: `That response did not match the schema. Error: ${String(lastError)}. Please try again.`
             }
           ];
-          const client = getClient();
           const completeResult = yield* Effect5.tryPromise({
             try: () => client.chat.completions.create({
-              model: typeof request.model === "string" ? request.model : request.model?.model ?? defaultModel,
-              max_tokens: request.maxTokens ?? config.defaultMaxTokens,
-              temperature: request.temperature ?? config.defaultTemperature,
+              ...requestBody,
               messages: toOpenAIMessages(msgs)
             }),
             catch: (error) => toEffectError2(error, "openai")
           });
           const response = mapOpenAIResponse(
             completeResult,
-            typeof request.model === "string" ? request.model : request.model?.model ?? defaultModel
+            model
           );
           try {
             const parsed = JSON.parse(response.content);
@@ -2423,11 +2455,10 @@ var LocalProviderLive = Layer5.effect(
         });
       }),
       completeStructured: (request) => Effect6.gen(function* () {
-        const schemaStr = JSON.stringify(
-          Schema4.encodedSchema(request.outputSchema),
-          null,
-          2
-        );
+        const encodedSchema = Schema4.encodedSchema(request.outputSchema);
+        const schemaObj = JSON.parse(JSON.stringify(encodedSchema));
+        const schemaStr = JSON.stringify(schemaObj, null, 2);
+        const ollamaFormat = schemaObj && typeof schemaObj === "object" && schemaObj.properties ? schemaObj : "json";
         const model = typeof request.model === "string" ? request.model : request.model?.model ?? defaultModel;
         let lastError = null;
         const maxRetries = request.maxParseRetries ?? 2;
@@ -2473,7 +2504,7 @@ No markdown, no code fences, just raw JSON.`
                 model,
                 messages: msgs,
                 stream: false,
-                format: "json",
+                format: ollamaFormat,
                 keep_alive: "5m",
                 options: {
                   temperature: request.temperature ?? config.defaultTemperature,
@@ -2529,7 +2560,7 @@ No markdown, no code fences, just raw JSON.`
       }),
       getStructuredOutputCapabilities: () => Effect6.succeed({
         nativeJsonMode: true,
-        jsonSchemaEnforcement: false,
+        jsonSchemaEnforcement: true,
         prefillSupport: false,
         grammarConstraints: true
       })
@@ -2656,6 +2687,8 @@ var GeminiProviderLive = Layer6.effect(
       if (opts.tools?.length) {
         cfg.tools = toGeminiTools([...opts.tools]);
       }
+      if (opts.responseMimeType) cfg.responseMimeType = opts.responseMimeType;
+      if (opts.responseSchema) cfg.responseSchema = opts.responseSchema;
       return cfg;
     };
     return LLMService.of({
@@ -2754,20 +2787,20 @@ var GeminiProviderLive = Layer6.effect(
         });
       }),
       completeStructured: (request) => Effect7.gen(function* () {
-        const schemaStr = JSON.stringify(
-          Schema5.encodedSchema(request.outputSchema),
-          null,
-          2
-        );
+        const jsonSchema = Schema5.encodedSchema(request.outputSchema);
+        const schemaObj = JSON.parse(JSON.stringify(jsonSchema));
+        const schemaStr = JSON.stringify(schemaObj, null, 2);
+        const client = yield* Effect7.promise(() => getClient());
+        let model = typeof request.model === "string" ? request.model : request.model?.model ?? config.defaultModel;
+        if (!model || model.startsWith("claude") || model.startsWith("gpt-")) {
+          model = GEMINI_DEFAULT_MODEL;
+        }
         const messagesWithFormat = [
           ...request.messages,
           {
             role: "user",
-            content: `
-Respond with ONLY valid JSON matching this schema:
-${schemaStr}
-No markdown, no code fences, just raw JSON.`
+            content: `Respond with JSON matching this schema:
+${schemaStr}`
           }
         ];
         let lastError = null;
@@ -2781,14 +2814,9 @@ No markdown, no code fences, just raw JSON.`
             },
             {
               role: "user",
-              content: `That response was not valid JSON. The parse error was: ${String(lastError)}. Please try again with valid JSON only.`
+              content: `That response did not match the schema. Error: ${String(lastError)}. Please try again.`
             }
           ];
-          const client = yield* Effect7.promise(() => getClient());
-          let model = typeof request.model === "string" ? request.model : request.model?.model ?? config.defaultModel;
-          if (!model || model.startsWith("claude") || model.startsWith("gpt-")) {
-            model = GEMINI_DEFAULT_MODEL;
-          }
           const response = yield* Effect7.tryPromise({
             try: () => client.models.generateContent({
               model,
@@ -2796,7 +2824,9 @@ No markdown, no code fences, just raw JSON.`
               config: buildGeminiConfig({
                 maxTokens: request.maxTokens,
                 temperature: request.temperature,
-                systemPrompt: request.systemPrompt
+                systemPrompt: request.systemPrompt,
+                responseMimeType: "application/json",
+                responseSchema: schemaObj
               })
             }),
             catch: toEffectError3
@@ -3375,8 +3405,147 @@ var ComplexityAnalysisSchema = Schema8.Struct({
 });
 // src/runtime.ts
-import { Layer as Layer9 } from "effect";
-var createLLMProviderLayer = (provider = "anthropic", testResponses, model, modelParams) => {
+import { Effect as Effect12, Layer as Layer9 } from "effect";
+// src/embedding-cache.ts
+import { Effect as Effect10 } from "effect";
+var MAX_ENTRIES = 5e3;
+var makeEmbeddingCache = (underlying) => {
+  const caches = /* @__PURE__ */ new Map();
+  const getModelCache = (model) => {
+    let c = caches.get(model);
+    if (!c) {
+      c = /* @__PURE__ */ new Map();
+      caches.set(model, c);
+    }
+    return c;
+  };
+  const evictIfNeeded = (cache) => {
+    if (cache.size > MAX_ENTRIES) {
+      const evictCount = Math.floor(MAX_ENTRIES * 0.2);
+      const keys = cache.keys();
+      for (let i = 0; i < evictCount; i++) {
+        const next = keys.next();
+        if (next.done) break;
+        cache.delete(next.value);
+      }
+    }
+  };
+  return {
+    embed: (texts, model) => Effect10.gen(function* () {
+      const modelKey = model ?? "__default__";
+      const cache = getModelCache(modelKey);
+      const results = new Array(texts.length);
+      const misses = [];
+      for (let i = 0; i < texts.length; i++) {
+        const hash = Bun.hash(texts[i]).toString(36);
+        const cached = cache.get(hash);
+        if (cached) {
+          results[i] = cached;
+        } else {
+          results[i] = null;
+          misses.push({ index: i, text: texts[i] });
+        }
+      }
+      if (misses.length === 0) {
+        return results;
+      }
+      const missTexts = misses.map((m) => m.text);
+      const embeddings = yield* underlying(missTexts, model);
+      for (let j = 0; j < misses.length; j++) {
+        const { index: index2, text } = misses[j];
+        const embedding = embeddings[j];
+        const hash = Bun.hash(text).toString(36);
+        cache.set(hash, embedding);
+        results[index2] = embedding;
+      }
+      evictIfNeeded(cache);
+      return results;
+    }),
+    size: () => {
+      let total = 0;
+      for (const c of caches.values()) total += c.size;
+      return total;
+    },
+    clear: () => caches.clear()
+  };
+};
+// src/circuit-breaker.ts
+import { Effect as Effect11 } from "effect";
+var makeCircuitBreaker = (config = {}) => {
+  const { failureThreshold, cooldownMs } = {
+    ...defaultCircuitBreakerConfig,
+    ...config
+  };
+  let currentState = "closed";
+  let consecutiveFailures = 0;
+  let openedAt = 0;
+  const onSuccess = () => {
+    consecutiveFailures = 0;
+    currentState = "closed";
+  };
+  const onFailure = () => {
+    consecutiveFailures++;
+    if (consecutiveFailures >= failureThreshold) {
+      currentState = "open";
+      openedAt = Date.now();
+    }
+  };
+  return {
+    protect: (effect) => Effect11.gen(function* () {
+      if (currentState === "open") {
+        if (Date.now() - openedAt >= cooldownMs) {
+          currentState = "half_open";
+        } else {
+          return yield* Effect11.fail(
+            new LLMError({
+              message: `Circuit breaker OPEN \u2014 ${consecutiveFailures} consecutive failures. Retry after ${Math.ceil((cooldownMs - (Date.now() - openedAt)) / 1e3)}s cooldown.`,
+              provider: "custom",
+              cause: void 0
+            })
+          );
+        }
+      }
+      const result = yield* Effect11.exit(effect);
+      if (result._tag === "Success") {
+        onSuccess();
+        return result.value;
+      }
+      onFailure();
+      return yield* Effect11.failCause(result.cause);
+    }),
+    state: () => currentState,
+    reset: () => {
+      currentState = "closed";
+      consecutiveFailures = 0;
+      openedAt = 0;
+    }
+  };
+};
+// src/runtime.ts
+var EmbeddingCacheLayer = Layer9.effect(
+  LLMService,
+  Effect12.gen(function* () {
+    const llm = yield* LLMService;
+    const cache = makeEmbeddingCache(llm.embed);
+    return LLMService.of({ ...llm, embed: cache.embed });
+  })
+);
+var makeCircuitBreakerLayer = (config) => Layer9.effect(
+  LLMService,
+  Effect12.gen(function* () {
+    const llm = yield* LLMService;
+    const breaker = makeCircuitBreaker(config);
+    return LLMService.of({
+      ...llm,
+      complete: (req) => breaker.protect(llm.complete(req)),
+      stream: (req) => breaker.protect(llm.stream(req))
+    });
+  })
+);
+var createLLMProviderLayer = (provider = "anthropic", testResponses, model, modelParams, circuitBreaker) => {
   if (provider === "test") {
     return Layer9.mergeAll(
       TestLLMServiceLayer(testResponses ?? {}),
@@ -3390,16 +3559,21 @@ var createLLMProviderLayer = (provider = "anthropic", testResponses, model, mode
   if (modelParams?.maxTokens !== void 0) configOverrides.defaultMaxTokens = modelParams.maxTokens;
   const configLayer = Object.keys(configOverrides).length > 0 ? Layer9.succeed(LLMConfig, LLMConfig.of({ ...llmConfigFromEnv, ...configOverrides })) : LLMConfigFromEnv;
   const providerLayer = provider === "anthropic" ? AnthropicProviderLive : provider === "openai" ? OpenAIProviderLive : provider === "gemini" ? GeminiProviderLive : provider === "litellm" ? LiteLLMProviderLive : LocalProviderLive;
-  return Layer9.mergeAll(
-    providerLayer.pipe(Layer9.provide(configLayer)),
-    PromptManagerLive
-  );
+  const baseProviderLayer = providerLayer.pipe(Layer9.provide(configLayer));
+  let llmLayer = EmbeddingCacheLayer.pipe(Layer9.provide(baseProviderLayer));
+  if (circuitBreaker) {
+    llmLayer = EmbeddingCacheLayer.pipe(
+      Layer9.provide(makeCircuitBreakerLayer(circuitBreaker).pipe(Layer9.provide(baseProviderLayer)))
+    );
+  }
+  return Layer9.mergeAll(llmLayer, PromptManagerLive);
 };
 var createLLMProviderLayerWithConfig = (config, provider = "anthropic") => {
   const configLayer = Layer9.succeed(LLMConfig, config);
   const providerLayer = provider === "anthropic" ? AnthropicProviderLive : provider === "openai" ? OpenAIProviderLive : provider === "gemini" ? GeminiProviderLive : provider === "litellm" ? LiteLLMProviderLive : LocalProviderLive;
+  const baseProviderLayer = providerLayer.pipe(Layer9.provide(configLayer));
   return Layer9.mergeAll(
-    providerLayer.pipe(Layer9.provide(configLayer)),
+    EmbeddingCacheLayer.pipe(Layer9.provide(baseProviderLayer)),
     PromptManagerLive
   );
 };
@@ -3447,10 +3621,13 @@ export {
   calculateCost,
   createLLMProviderLayer,
   createLLMProviderLayerWithConfig,
+  defaultCircuitBreakerConfig,
   estimateTokenCount,
   getProviderDefaultModel,
   llmConfigFromEnv,
   makeCacheable,
+  makeCircuitBreaker,
+  makeEmbeddingCache,
   retryPolicy
 };
 //# sourceMappingURL=index.js.map