@reactive-agents/llm-provider 0.6.2 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -7,7 +7,7 @@ Provides a unified `LLMService` interface with adapters for Anthropic, OpenAI, G
7
7
  ## Installation
8
8
 
9
9
  ```bash
10
- bun add @reactive-agents/llm-provider effect
10
+ bun add @reactive-agents/llm-provider
11
11
  ```
12
12
 
13
13
  Install the SDK for your chosen provider:
@@ -20,20 +20,23 @@ bun add @google/genai # Google Gemini
20
20
 
21
21
  ## Supported Providers
22
22
 
23
- | Provider | Models | Streaming | Embeddings | Structured Output |
24
- |----------|--------|-----------|------------|------------------|
25
- | `anthropic` | claude-haiku, claude-sonnet, claude-opus | ✓ | — | ✓ |
26
- | `openai` | gpt-4o, gpt-4o-mini, o1-* | ✓ | ✓ | ✓ |
27
- | `gemini` | gemini-2.0-flash, gemini-2.5-pro | ✓ | ✓ | ✓ |
28
- | `ollama` | any local model | ✓ | ✓ | ✓ |
29
- | `test` | deterministic mock | ✓ | ✓ | — |
23
+ | Provider | Models | Streaming | Embeddings | Structured Output |
24
+ | ----------- | ---------------------------------------- | --------- | ---------- | ----------------- |
25
+ | `anthropic` | claude-haiku, claude-sonnet, claude-opus | ✓ | — | ✓ |
26
+ | `openai` | gpt-4o, gpt-4o-mini, o1-\* | ✓ | ✓ | ✓ |
27
+ | `gemini` | gemini-2.0-flash, gemini-2.5-pro | ✓ | ✓ | ✓ |
28
+ | `ollama` | any local model | ✓ | ✓ | ✓ |
29
+ | `test` | deterministic mock | ✓ | ✓ | — |
30
30
 
31
31
  ## Usage
32
32
 
33
33
  ### Anthropic
34
34
 
35
35
  ```typescript
36
- import { createLLMProviderLayer, LLMService } from "@reactive-agents/llm-provider";
36
+ import {
37
+ createLLMProviderLayer,
38
+ LLMService,
39
+ } from "@reactive-agents/llm-provider";
37
40
  import { Effect } from "effect";
38
41
 
39
42
  const layer = createLLMProviderLayer("anthropic");
@@ -51,7 +54,10 @@ const result = await Effect.runPromise(
51
54
  ### Google Gemini
52
55
 
53
56
  ```typescript
54
- import { createLLMProviderLayer, LLMService } from "@reactive-agents/llm-provider";
57
+ import {
58
+ createLLMProviderLayer,
59
+ LLMService,
60
+ } from "@reactive-agents/llm-provider";
55
61
  import { Effect } from "effect";
56
62
 
57
63
  // Set GOOGLE_API_KEY in your environment
package/dist/index.d.ts CHANGED
@@ -910,7 +910,7 @@ declare const LLMService_base: Context.TagClass<LLMService, "LLMService", {
910
910
  * Anthropic has no embeddings API — routes to OpenAI or Ollama
911
911
  * per LLMConfig.embeddingConfig.
912
912
  */
913
- readonly embed: (texts: readonly string[], model?: string) => Effect.Effect<readonly number[][], LLMErrors>;
913
+ readonly embed: (texts: readonly string[], model?: string) => Effect.Effect<readonly (readonly number[])[], LLMErrors>;
914
914
  /**
915
915
  * Count tokens for a set of messages.
916
916
  * Used for context window management.
@@ -1306,7 +1306,7 @@ declare const TestLLMServiceLayer: (responses?: Record<string, string>) => Layer
1306
1306
 
1307
1307
  /**
1308
1308
  * Estimate token count for messages.
1309
- * Uses a simple heuristic: ~4 characters per token for English text.
1309
+ * Uses content-aware heuristics: ~3 chars/token for code/JSON, ~4 for English text.
1310
1310
  * This is used as a fallback when the provider's token counting API is unavailable.
1311
1311
  */
1312
1312
  declare const estimateTokenCount: (messages: readonly LLMMessage[]) => Effect.Effect<number, never>;
@@ -1321,6 +1321,12 @@ declare const calculateCost: (inputTokens: number, outputTokens: number, model:
1321
1321
  * Only retries on rate limit and timeout errors.
1322
1322
  */
1323
1323
  declare const retryPolicy: Schedule.Schedule<[number, effect_Duration.Duration], LLMErrors, never>;
1324
+ type CircuitBreakerConfig = {
1325
+ readonly failureThreshold: number;
1326
+ readonly cooldownMs: number;
1327
+ readonly halfOpenRequests: number;
1328
+ };
1329
+ declare const defaultCircuitBreakerConfig: CircuitBreakerConfig;
1324
1330
 
1325
1331
  /**
1326
1332
  * Schema for ReAct action parsing.
@@ -1420,10 +1426,54 @@ declare const createLLMProviderLayer: (provider?: "anthropic" | "openai" | "olla
1420
1426
  thinking?: boolean;
1421
1427
  temperature?: number;
1422
1428
  maxTokens?: number;
1423
- }) => Layer.Layer<LLMService | PromptManager, never, never>;
1429
+ }, circuitBreaker?: Partial<CircuitBreakerConfig>) => Layer.Layer<LLMService | PromptManager, never, never>;
1424
1430
  /**
1425
1431
  * LLM layer with custom config (for programmatic use).
1426
1432
  */
1427
1433
  declare const createLLMProviderLayerWithConfig: (config: typeof LLMConfig.Service, provider?: "anthropic" | "openai" | "ollama" | "gemini" | "litellm") => Layer.Layer<LLMService | PromptManager, never, never>;
1428
1434
 
1429
- export { AnthropicProviderLive, type CacheControl, CacheControlSchema, type CacheableContentBlock, type CompletionRequest, type CompletionResponse, CompletionResponseSchema, type ComplexityAnalysis, ComplexityAnalysisSchema, type ContentBlock, DefaultEmbeddingConfig, type EmbeddingConfig, EmbeddingConfigSchema, GeminiProviderLive, ImageContentBlockSchema, type ImageSource, ImageSourceSchema, LLMConfig, LLMConfigFromEnv, LLMContextOverflowError, LLMError, type LLMErrors, type LLMMessage, LLMParseError, type LLMProvider, LLMProviderType, LLMRateLimitError, LLMService, LLMTimeoutError, LiteLLMProviderLive, LocalProviderLive, type ModelConfig, ModelConfigSchema, type ModelPresetName, ModelPresets, OpenAIProviderLive, PROVIDER_DEFAULT_MODELS, type Plan, PlanSchema, PromptManager, PromptManagerLive, type ReActAction, ReActActionSchema, type Reflection, ReflectionSchema, type StopReason, StopReasonSchema, type StrategySelection, StrategySelectionSchema, type StreamEvent, type StructuredCompletionRequest, type StructuredOutputCapabilities, TestLLMService, TestLLMServiceLayer, TextContentBlockSchema, type ThoughtEvaluation, ThoughtEvaluationSchema, type TokenUsage, TokenUsageSchema, type ToolCall, ToolCallSchema, type ToolDefinition, ToolDefinitionSchema, ToolResultContentBlockSchema, ToolUseContentBlockSchema, type TruncationStrategy, calculateCost, createLLMProviderLayer, createLLMProviderLayerWithConfig, estimateTokenCount, getProviderDefaultModel, llmConfigFromEnv, makeCacheable, retryPolicy };
1435
+ /**
1436
+ * Content-hash embedding cache — deduplicates embed() calls per text.
1437
+ * Cache is keyed by Bun.hash(text) and avoids re-embedding identical strings.
1438
+ */
1439
+
1440
+ interface EmbeddingCache {
1441
+ /** Wrap an embed function with content-hash deduplication. */
1442
+ readonly embed: (texts: readonly string[], model?: string) => Effect.Effect<readonly (readonly number[])[], LLMErrors>;
1443
+ /** Number of cached embeddings. */
1444
+ readonly size: () => number;
1445
+ /** Clear all cached entries. */
1446
+ readonly clear: () => void;
1447
+ }
1448
+ /**
1449
+ * Create an embedding cache that wraps an underlying embed function.
1450
+ * Each text is hashed individually; only cache-misses are sent to the LLM.
1451
+ */
1452
+ declare const makeEmbeddingCache: (underlying: (texts: readonly string[], model?: string) => Effect.Effect<readonly (readonly number[])[], LLMErrors>) => EmbeddingCache;
1453
+
1454
+ /**
1455
+ * Circuit Breaker — prevents cascading failures by fast-failing when
1456
+ * the underlying LLM provider is consistently erroring.
1457
+ *
1458
+ * States: CLOSED (normal) → OPEN (fast-fail) → HALF_OPEN (test one request)
1459
+ */
1460
+
1461
+ type State = "closed" | "open" | "half_open";
1462
+ interface CircuitBreaker {
1463
+ /** Wrap an Effect with circuit breaker protection. */
1464
+ readonly protect: <A>(effect: Effect.Effect<A, LLMErrors>) => Effect.Effect<A, LLMErrors>;
1465
+ /** Current state. */
1466
+ readonly state: () => State;
1467
+ /** Reset to closed. */
1468
+ readonly reset: () => void;
1469
+ }
1470
+ /**
1471
+ * Create a circuit breaker with configurable thresholds.
1472
+ *
1473
+ * - After `failureThreshold` consecutive failures → OPEN (fast-fail).
1474
+ * - After `cooldownMs` → HALF_OPEN (allow one test request).
1475
+ * - If test request succeeds → CLOSED. If it fails → OPEN again.
1476
+ */
1477
+ declare const makeCircuitBreaker: (config?: Partial<CircuitBreakerConfig>) => CircuitBreaker;
1478
+
1479
+ export { AnthropicProviderLive, type CacheControl, CacheControlSchema, type CacheableContentBlock, type CircuitBreaker, type CircuitBreakerConfig, type CompletionRequest, type CompletionResponse, CompletionResponseSchema, type ComplexityAnalysis, ComplexityAnalysisSchema, type ContentBlock, DefaultEmbeddingConfig, type EmbeddingCache, type EmbeddingConfig, EmbeddingConfigSchema, GeminiProviderLive, ImageContentBlockSchema, type ImageSource, ImageSourceSchema, LLMConfig, LLMConfigFromEnv, LLMContextOverflowError, LLMError, type LLMErrors, type LLMMessage, LLMParseError, type LLMProvider, LLMProviderType, LLMRateLimitError, LLMService, LLMTimeoutError, LiteLLMProviderLive, LocalProviderLive, type ModelConfig, ModelConfigSchema, type ModelPresetName, ModelPresets, OpenAIProviderLive, PROVIDER_DEFAULT_MODELS, type Plan, PlanSchema, PromptManager, PromptManagerLive, type ReActAction, ReActActionSchema, type Reflection, ReflectionSchema, type StopReason, StopReasonSchema, type StrategySelection, StrategySelectionSchema, type StreamEvent, type StructuredCompletionRequest, type StructuredOutputCapabilities, TestLLMService, TestLLMServiceLayer, TextContentBlockSchema, type ThoughtEvaluation, ThoughtEvaluationSchema, type TokenUsage, TokenUsageSchema, type ToolCall, ToolCallSchema, type ToolDefinition, ToolDefinitionSchema, ToolResultContentBlockSchema, ToolUseContentBlockSchema, type TruncationStrategy, calculateCost, createLLMProviderLayer, createLLMProviderLayerWithConfig, defaultCircuitBreakerConfig, estimateTokenCount, getProviderDefaultModel, llmConfigFromEnv, makeCacheable, makeCircuitBreaker, makeEmbeddingCache, retryPolicy };
package/dist/index.js CHANGED
@@ -1432,25 +1432,36 @@ import { Effect as Effect3, Context as Context3, Layer as Layer2 } from "effect"
1432
1432
 
1433
1433
  // src/token-counter.ts
1434
1434
  import { Effect as Effect2 } from "effect";
1435
+ function charsPerToken(text) {
1436
+ if (text.length === 0) return 4;
1437
+ const sample = text.slice(0, 2e3);
1438
+ const codeSignals = (sample.match(/[{}();=<>\[\]]/g) ?? []).length;
1439
+ const jsonSignals = (sample.match(/"\w+"\s*:/g) ?? []).length;
1440
+ const ratio = (codeSignals + jsonSignals) / sample.length;
1441
+ if (ratio > 0.08) return 3;
1442
+ if (ratio > 0.04) return 3.5;
1443
+ return 4;
1444
+ }
1435
1445
  var estimateTokenCount = (messages) => Effect2.sync(() => {
1436
- let totalChars = 0;
1446
+ let totalTokens = 0;
1437
1447
  for (const msg of messages) {
1438
1448
  if (typeof msg.content === "string") {
1439
- totalChars += msg.content.length;
1449
+ totalTokens += Math.ceil(msg.content.length / charsPerToken(msg.content));
1440
1450
  } else {
1441
1451
  for (const block of msg.content) {
1442
1452
  if (block.type === "text") {
1443
- totalChars += block.text.length;
1453
+ totalTokens += Math.ceil(block.text.length / charsPerToken(block.text));
1444
1454
  } else if (block.type === "tool_result") {
1445
- totalChars += block.content.length;
1455
+ totalTokens += Math.ceil(block.content.length / charsPerToken(block.content));
1446
1456
  } else if (block.type === "tool_use") {
1447
- totalChars += JSON.stringify(block.input).length;
1457
+ const json = JSON.stringify(block.input);
1458
+ totalTokens += Math.ceil(json.length / 3);
1448
1459
  }
1449
1460
  }
1450
1461
  }
1451
- totalChars += 16;
1462
+ totalTokens += 4;
1452
1463
  }
1453
- return Math.ceil(totalChars / 4);
1464
+ return totalTokens;
1454
1465
  });
1455
1466
  var calculateCost = (inputTokens, outputTokens, model) => {
1456
1467
  const costMap = {
@@ -1578,6 +1589,11 @@ var retryPolicy = Schedule.intersect(
1578
1589
  (error) => error._tag === "LLMRateLimitError" || error._tag === "LLMTimeoutError"
1579
1590
  )
1580
1591
  );
1592
+ var defaultCircuitBreakerConfig = {
1593
+ failureThreshold: 5,
1594
+ cooldownMs: 3e4,
1595
+ halfOpenRequests: 1
1596
+ };
1581
1597
 
1582
1598
  // src/providers/anthropic.ts
1583
1599
  var toAnthropicMessages = (messages) => messages.filter((m) => m.role !== "system").map((m) => {
@@ -1622,6 +1638,16 @@ var toEffectError = (error, provider) => {
1622
1638
  cause: error
1623
1639
  });
1624
1640
  };
1641
+ var MIN_SYSTEM_CACHE_CHARS = 4096;
1642
+ var buildSystemParam = (systemPrompt) => {
1643
+ if (!systemPrompt) return void 0;
1644
+ if (systemPrompt.length < MIN_SYSTEM_CACHE_CHARS) return systemPrompt;
1645
+ return [{
1646
+ type: "text",
1647
+ text: systemPrompt,
1648
+ cache_control: { type: "ephemeral" }
1649
+ }];
1650
+ };
1625
1651
  var AnthropicProviderLive = Layer3.effect(
1626
1652
  LLMService,
1627
1653
  Effect4.gen(function* () {
@@ -1644,7 +1670,7 @@ var AnthropicProviderLive = Layer3.effect(
1644
1670
  model,
1645
1671
  max_tokens: request.maxTokens ?? config.defaultMaxTokens,
1646
1672
  temperature: request.temperature ?? config.defaultTemperature,
1647
- system: request.systemPrompt,
1673
+ system: buildSystemParam(request.systemPrompt),
1648
1674
  messages: toAnthropicMessages(request.messages),
1649
1675
  stop_sequences: request.stopSequences ? [...request.stopSequences] : void 0,
1650
1676
  tools: request.tools?.map(toAnthropicTool)
@@ -1674,7 +1700,7 @@ var AnthropicProviderLive = Layer3.effect(
1674
1700
  model,
1675
1701
  max_tokens: request.maxTokens ?? config.defaultMaxTokens,
1676
1702
  temperature: request.temperature ?? config.defaultTemperature,
1677
- system: request.systemPrompt,
1703
+ system: buildSystemParam(request.systemPrompt),
1678
1704
  messages: toAnthropicMessages(request.messages)
1679
1705
  });
1680
1706
  stream.on("text", (text) => {
@@ -1714,17 +1740,13 @@ var AnthropicProviderLive = Layer3.effect(
1714
1740
  });
1715
1741
  }),
1716
1742
  completeStructured: (request) => Effect4.gen(function* () {
1717
- const schemaStr = JSON.stringify(
1718
- Schema2.encodedSchema(request.outputSchema),
1719
- null,
1720
- 2
1721
- );
1743
+ const jsonSchema = Schema2.encodedSchema(request.outputSchema);
1744
+ const schemaStr = JSON.stringify(jsonSchema, null, 2);
1722
1745
  const messagesWithFormat = [
1723
1746
  ...request.messages,
1724
1747
  {
1725
1748
  role: "user",
1726
- content: `
1727
- Respond with ONLY valid JSON matching this schema:
1749
+ content: `Respond with ONLY valid JSON matching this schema:
1728
1750
  ${schemaStr}
1729
1751
 
1730
1752
  No markdown, no code fences, just raw JSON.`
@@ -1741,9 +1763,11 @@ No markdown, no code fences, just raw JSON.`
1741
1763
  },
1742
1764
  {
1743
1765
  role: "user",
1744
- content: `That response was not valid JSON. The parse error was: ${String(lastError)}. Please try again with valid JSON only.`
1766
+ content: `That response did not match the schema. Error: ${String(lastError)}. Please try again with valid JSON only.`
1745
1767
  }
1746
1768
  ];
1769
+ const anthropicMsgs = toAnthropicMessages(msgs);
1770
+ anthropicMsgs.push({ role: "assistant", content: "{" });
1747
1771
  const completeResult = yield* Effect4.tryPromise({
1748
1772
  try: () => {
1749
1773
  const client = getClient();
@@ -1751,8 +1775,8 @@ No markdown, no code fences, just raw JSON.`
1751
1775
  model: typeof request.model === "string" ? request.model : request.model?.model ?? config.defaultModel,
1752
1776
  max_tokens: request.maxTokens ?? config.defaultMaxTokens,
1753
1777
  temperature: request.temperature ?? config.defaultTemperature,
1754
- system: request.systemPrompt,
1755
- messages: toAnthropicMessages(msgs)
1778
+ system: buildSystemParam(request.systemPrompt),
1779
+ messages: anthropicMsgs
1756
1780
  });
1757
1781
  },
1758
1782
  catch: (error) => toEffectError(error, "anthropic")
@@ -1761,8 +1785,9 @@ No markdown, no code fences, just raw JSON.`
1761
1785
  completeResult,
1762
1786
  typeof request.model === "string" ? request.model : request.model?.model ?? config.defaultModel
1763
1787
  );
1788
+ const fullContent = "{" + response.content;
1764
1789
  try {
1765
- const parsed = JSON.parse(response.content);
1790
+ const parsed = JSON.parse(fullContent);
1766
1791
  const decoded = Schema2.decodeUnknownEither(
1767
1792
  request.outputSchema
1768
1793
  )(parsed);
@@ -2031,49 +2056,56 @@ var OpenAIProviderLive = Layer4.effect(
2031
2056
  });
2032
2057
  }),
2033
2058
  completeStructured: (request) => Effect5.gen(function* () {
2034
- const schemaStr = JSON.stringify(
2035
- Schema3.encodedSchema(request.outputSchema),
2036
- null,
2037
- 2
2038
- );
2039
- const messagesWithFormat = [
2059
+ const jsonSchema = Schema3.encodedSchema(request.outputSchema);
2060
+ const schemaObj = JSON.parse(JSON.stringify(jsonSchema));
2061
+ const schemaStr = JSON.stringify(schemaObj, null, 2);
2062
+ const model = typeof request.model === "string" ? request.model : request.model?.model ?? defaultModel;
2063
+ const client = getClient();
2064
+ const maxRetries = request.maxParseRetries ?? 2;
2065
+ const requestBody = {
2066
+ model,
2067
+ max_tokens: request.maxTokens ?? config.defaultMaxTokens,
2068
+ temperature: request.temperature ?? config.defaultTemperature,
2069
+ response_format: {
2070
+ type: "json_schema",
2071
+ json_schema: {
2072
+ name: "structured_output",
2073
+ strict: true,
2074
+ schema: schemaObj
2075
+ }
2076
+ }
2077
+ };
2078
+ const messages = [
2040
2079
  ...request.messages,
2041
2080
  {
2042
2081
  role: "user",
2043
- content: `
2044
- Respond with ONLY valid JSON matching this schema:
2045
- ${schemaStr}
2046
-
2047
- No markdown, no code fences, just raw JSON.`
2082
+ content: `Respond with JSON matching this schema:
2083
+ ${schemaStr}`
2048
2084
  }
2049
2085
  ];
2050
2086
  let lastError = null;
2051
- const maxRetries = request.maxParseRetries ?? 2;
2052
2087
  for (let attempt = 0; attempt <= maxRetries; attempt++) {
2053
- const msgs = attempt === 0 ? messagesWithFormat : [
2054
- ...messagesWithFormat,
2088
+ const msgs = attempt === 0 ? messages : [
2089
+ ...messages,
2055
2090
  {
2056
2091
  role: "assistant",
2057
2092
  content: String(lastError)
2058
2093
  },
2059
2094
  {
2060
2095
  role: "user",
2061
- content: `That response was not valid JSON. The parse error was: ${String(lastError)}. Please try again with valid JSON only.`
2096
+ content: `That response did not match the schema. Error: ${String(lastError)}. Please try again.`
2062
2097
  }
2063
2098
  ];
2064
- const client = getClient();
2065
2099
  const completeResult = yield* Effect5.tryPromise({
2066
2100
  try: () => client.chat.completions.create({
2067
- model: typeof request.model === "string" ? request.model : request.model?.model ?? defaultModel,
2068
- max_tokens: request.maxTokens ?? config.defaultMaxTokens,
2069
- temperature: request.temperature ?? config.defaultTemperature,
2101
+ ...requestBody,
2070
2102
  messages: toOpenAIMessages(msgs)
2071
2103
  }),
2072
2104
  catch: (error) => toEffectError2(error, "openai")
2073
2105
  });
2074
2106
  const response = mapOpenAIResponse(
2075
2107
  completeResult,
2076
- typeof request.model === "string" ? request.model : request.model?.model ?? defaultModel
2108
+ model
2077
2109
  );
2078
2110
  try {
2079
2111
  const parsed = JSON.parse(response.content);
@@ -2423,11 +2455,10 @@ var LocalProviderLive = Layer5.effect(
2423
2455
  });
2424
2456
  }),
2425
2457
  completeStructured: (request) => Effect6.gen(function* () {
2426
- const schemaStr = JSON.stringify(
2427
- Schema4.encodedSchema(request.outputSchema),
2428
- null,
2429
- 2
2430
- );
2458
+ const encodedSchema = Schema4.encodedSchema(request.outputSchema);
2459
+ const schemaObj = JSON.parse(JSON.stringify(encodedSchema));
2460
+ const schemaStr = JSON.stringify(schemaObj, null, 2);
2461
+ const ollamaFormat = schemaObj && typeof schemaObj === "object" && schemaObj.properties ? schemaObj : "json";
2431
2462
  const model = typeof request.model === "string" ? request.model : request.model?.model ?? defaultModel;
2432
2463
  let lastError = null;
2433
2464
  const maxRetries = request.maxParseRetries ?? 2;
@@ -2473,7 +2504,7 @@ No markdown, no code fences, just raw JSON.`
2473
2504
  model,
2474
2505
  messages: msgs,
2475
2506
  stream: false,
2476
- format: "json",
2507
+ format: ollamaFormat,
2477
2508
  keep_alive: "5m",
2478
2509
  options: {
2479
2510
  temperature: request.temperature ?? config.defaultTemperature,
@@ -2529,7 +2560,7 @@ No markdown, no code fences, just raw JSON.`
2529
2560
  }),
2530
2561
  getStructuredOutputCapabilities: () => Effect6.succeed({
2531
2562
  nativeJsonMode: true,
2532
- jsonSchemaEnforcement: false,
2563
+ jsonSchemaEnforcement: true,
2533
2564
  prefillSupport: false,
2534
2565
  grammarConstraints: true
2535
2566
  })
@@ -2656,6 +2687,8 @@ var GeminiProviderLive = Layer6.effect(
2656
2687
  if (opts.tools?.length) {
2657
2688
  cfg.tools = toGeminiTools([...opts.tools]);
2658
2689
  }
2690
+ if (opts.responseMimeType) cfg.responseMimeType = opts.responseMimeType;
2691
+ if (opts.responseSchema) cfg.responseSchema = opts.responseSchema;
2659
2692
  return cfg;
2660
2693
  };
2661
2694
  return LLMService.of({
@@ -2754,20 +2787,20 @@ var GeminiProviderLive = Layer6.effect(
2754
2787
  });
2755
2788
  }),
2756
2789
  completeStructured: (request) => Effect7.gen(function* () {
2757
- const schemaStr = JSON.stringify(
2758
- Schema5.encodedSchema(request.outputSchema),
2759
- null,
2760
- 2
2761
- );
2790
+ const jsonSchema = Schema5.encodedSchema(request.outputSchema);
2791
+ const schemaObj = JSON.parse(JSON.stringify(jsonSchema));
2792
+ const schemaStr = JSON.stringify(schemaObj, null, 2);
2793
+ const client = yield* Effect7.promise(() => getClient());
2794
+ let model = typeof request.model === "string" ? request.model : request.model?.model ?? config.defaultModel;
2795
+ if (!model || model.startsWith("claude") || model.startsWith("gpt-")) {
2796
+ model = GEMINI_DEFAULT_MODEL;
2797
+ }
2762
2798
  const messagesWithFormat = [
2763
2799
  ...request.messages,
2764
2800
  {
2765
2801
  role: "user",
2766
- content: `
2767
- Respond with ONLY valid JSON matching this schema:
2768
- ${schemaStr}
2769
-
2770
- No markdown, no code fences, just raw JSON.`
2802
+ content: `Respond with JSON matching this schema:
2803
+ ${schemaStr}`
2771
2804
  }
2772
2805
  ];
2773
2806
  let lastError = null;
@@ -2781,14 +2814,9 @@ No markdown, no code fences, just raw JSON.`
2781
2814
  },
2782
2815
  {
2783
2816
  role: "user",
2784
- content: `That response was not valid JSON. The parse error was: ${String(lastError)}. Please try again with valid JSON only.`
2817
+ content: `That response did not match the schema. Error: ${String(lastError)}. Please try again.`
2785
2818
  }
2786
2819
  ];
2787
- const client = yield* Effect7.promise(() => getClient());
2788
- let model = typeof request.model === "string" ? request.model : request.model?.model ?? config.defaultModel;
2789
- if (!model || model.startsWith("claude") || model.startsWith("gpt-")) {
2790
- model = GEMINI_DEFAULT_MODEL;
2791
- }
2792
2820
  const response = yield* Effect7.tryPromise({
2793
2821
  try: () => client.models.generateContent({
2794
2822
  model,
@@ -2796,7 +2824,9 @@ No markdown, no code fences, just raw JSON.`
2796
2824
  config: buildGeminiConfig({
2797
2825
  maxTokens: request.maxTokens,
2798
2826
  temperature: request.temperature,
2799
- systemPrompt: request.systemPrompt
2827
+ systemPrompt: request.systemPrompt,
2828
+ responseMimeType: "application/json",
2829
+ responseSchema: schemaObj
2800
2830
  })
2801
2831
  }),
2802
2832
  catch: toEffectError3
@@ -3375,8 +3405,147 @@ var ComplexityAnalysisSchema = Schema8.Struct({
3375
3405
  });
3376
3406
 
3377
3407
  // src/runtime.ts
3378
- import { Layer as Layer9 } from "effect";
3379
- var createLLMProviderLayer = (provider = "anthropic", testResponses, model, modelParams) => {
3408
+ import { Effect as Effect12, Layer as Layer9 } from "effect";
3409
+
3410
+ // src/embedding-cache.ts
3411
+ import { Effect as Effect10 } from "effect";
3412
+ var MAX_ENTRIES = 5e3;
3413
+ var makeEmbeddingCache = (underlying) => {
3414
+ const caches = /* @__PURE__ */ new Map();
3415
+ const getModelCache = (model) => {
3416
+ let c = caches.get(model);
3417
+ if (!c) {
3418
+ c = /* @__PURE__ */ new Map();
3419
+ caches.set(model, c);
3420
+ }
3421
+ return c;
3422
+ };
3423
+ const evictIfNeeded = (cache) => {
3424
+ if (cache.size > MAX_ENTRIES) {
3425
+ const evictCount = Math.floor(MAX_ENTRIES * 0.2);
3426
+ const keys = cache.keys();
3427
+ for (let i = 0; i < evictCount; i++) {
3428
+ const next = keys.next();
3429
+ if (next.done) break;
3430
+ cache.delete(next.value);
3431
+ }
3432
+ }
3433
+ };
3434
+ return {
3435
+ embed: (texts, model) => Effect10.gen(function* () {
3436
+ const modelKey = model ?? "__default__";
3437
+ const cache = getModelCache(modelKey);
3438
+ const results = new Array(texts.length);
3439
+ const misses = [];
3440
+ for (let i = 0; i < texts.length; i++) {
3441
+ const hash = Bun.hash(texts[i]).toString(36);
3442
+ const cached = cache.get(hash);
3443
+ if (cached) {
3444
+ results[i] = cached;
3445
+ } else {
3446
+ results[i] = null;
3447
+ misses.push({ index: i, text: texts[i] });
3448
+ }
3449
+ }
3450
+ if (misses.length === 0) {
3451
+ return results;
3452
+ }
3453
+ const missTexts = misses.map((m) => m.text);
3454
+ const embeddings = yield* underlying(missTexts, model);
3455
+ for (let j = 0; j < misses.length; j++) {
3456
+ const { index: index2, text } = misses[j];
3457
+ const embedding = embeddings[j];
3458
+ const hash = Bun.hash(text).toString(36);
3459
+ cache.set(hash, embedding);
3460
+ results[index2] = embedding;
3461
+ }
3462
+ evictIfNeeded(cache);
3463
+ return results;
3464
+ }),
3465
+ size: () => {
3466
+ let total = 0;
3467
+ for (const c of caches.values()) total += c.size;
3468
+ return total;
3469
+ },
3470
+ clear: () => caches.clear()
3471
+ };
3472
+ };
3473
+
3474
+ // src/circuit-breaker.ts
3475
+ import { Effect as Effect11 } from "effect";
3476
+ var makeCircuitBreaker = (config = {}) => {
3477
+ const { failureThreshold, cooldownMs } = {
3478
+ ...defaultCircuitBreakerConfig,
3479
+ ...config
3480
+ };
3481
+ let currentState = "closed";
3482
+ let consecutiveFailures = 0;
3483
+ let openedAt = 0;
3484
+ const onSuccess = () => {
3485
+ consecutiveFailures = 0;
3486
+ currentState = "closed";
3487
+ };
3488
+ const onFailure = () => {
3489
+ consecutiveFailures++;
3490
+ if (consecutiveFailures >= failureThreshold) {
3491
+ currentState = "open";
3492
+ openedAt = Date.now();
3493
+ }
3494
+ };
3495
+ return {
3496
+ protect: (effect) => Effect11.gen(function* () {
3497
+ if (currentState === "open") {
3498
+ if (Date.now() - openedAt >= cooldownMs) {
3499
+ currentState = "half_open";
3500
+ } else {
3501
+ return yield* Effect11.fail(
3502
+ new LLMError({
3503
+ message: `Circuit breaker OPEN \u2014 ${consecutiveFailures} consecutive failures. Retry after ${Math.ceil((cooldownMs - (Date.now() - openedAt)) / 1e3)}s cooldown.`,
3504
+ provider: "custom",
3505
+ cause: void 0
3506
+ })
3507
+ );
3508
+ }
3509
+ }
3510
+ const result = yield* Effect11.exit(effect);
3511
+ if (result._tag === "Success") {
3512
+ onSuccess();
3513
+ return result.value;
3514
+ }
3515
+ onFailure();
3516
+ return yield* Effect11.failCause(result.cause);
3517
+ }),
3518
+ state: () => currentState,
3519
+ reset: () => {
3520
+ currentState = "closed";
3521
+ consecutiveFailures = 0;
3522
+ openedAt = 0;
3523
+ }
3524
+ };
3525
+ };
3526
+
3527
+ // src/runtime.ts
3528
+ var EmbeddingCacheLayer = Layer9.effect(
3529
+ LLMService,
3530
+ Effect12.gen(function* () {
3531
+ const llm = yield* LLMService;
3532
+ const cache = makeEmbeddingCache(llm.embed);
3533
+ return LLMService.of({ ...llm, embed: cache.embed });
3534
+ })
3535
+ );
3536
+ var makeCircuitBreakerLayer = (config) => Layer9.effect(
3537
+ LLMService,
3538
+ Effect12.gen(function* () {
3539
+ const llm = yield* LLMService;
3540
+ const breaker = makeCircuitBreaker(config);
3541
+ return LLMService.of({
3542
+ ...llm,
3543
+ complete: (req) => breaker.protect(llm.complete(req)),
3544
+ stream: (req) => breaker.protect(llm.stream(req))
3545
+ });
3546
+ })
3547
+ );
3548
+ var createLLMProviderLayer = (provider = "anthropic", testResponses, model, modelParams, circuitBreaker) => {
3380
3549
  if (provider === "test") {
3381
3550
  return Layer9.mergeAll(
3382
3551
  TestLLMServiceLayer(testResponses ?? {}),
@@ -3390,16 +3559,21 @@ var createLLMProviderLayer = (provider = "anthropic", testResponses, model, mode
3390
3559
  if (modelParams?.maxTokens !== void 0) configOverrides.defaultMaxTokens = modelParams.maxTokens;
3391
3560
  const configLayer = Object.keys(configOverrides).length > 0 ? Layer9.succeed(LLMConfig, LLMConfig.of({ ...llmConfigFromEnv, ...configOverrides })) : LLMConfigFromEnv;
3392
3561
  const providerLayer = provider === "anthropic" ? AnthropicProviderLive : provider === "openai" ? OpenAIProviderLive : provider === "gemini" ? GeminiProviderLive : provider === "litellm" ? LiteLLMProviderLive : LocalProviderLive;
3393
- return Layer9.mergeAll(
3394
- providerLayer.pipe(Layer9.provide(configLayer)),
3395
- PromptManagerLive
3396
- );
3562
+ const baseProviderLayer = providerLayer.pipe(Layer9.provide(configLayer));
3563
+ let llmLayer = EmbeddingCacheLayer.pipe(Layer9.provide(baseProviderLayer));
3564
+ if (circuitBreaker) {
3565
+ llmLayer = EmbeddingCacheLayer.pipe(
3566
+ Layer9.provide(makeCircuitBreakerLayer(circuitBreaker).pipe(Layer9.provide(baseProviderLayer)))
3567
+ );
3568
+ }
3569
+ return Layer9.mergeAll(llmLayer, PromptManagerLive);
3397
3570
  };
3398
3571
  var createLLMProviderLayerWithConfig = (config, provider = "anthropic") => {
3399
3572
  const configLayer = Layer9.succeed(LLMConfig, config);
3400
3573
  const providerLayer = provider === "anthropic" ? AnthropicProviderLive : provider === "openai" ? OpenAIProviderLive : provider === "gemini" ? GeminiProviderLive : provider === "litellm" ? LiteLLMProviderLive : LocalProviderLive;
3574
+ const baseProviderLayer = providerLayer.pipe(Layer9.provide(configLayer));
3401
3575
  return Layer9.mergeAll(
3402
- providerLayer.pipe(Layer9.provide(configLayer)),
3576
+ EmbeddingCacheLayer.pipe(Layer9.provide(baseProviderLayer)),
3403
3577
  PromptManagerLive
3404
3578
  );
3405
3579
  };
@@ -3447,10 +3621,13 @@ export {
3447
3621
  calculateCost,
3448
3622
  createLLMProviderLayer,
3449
3623
  createLLMProviderLayerWithConfig,
3624
+ defaultCircuitBreakerConfig,
3450
3625
  estimateTokenCount,
3451
3626
  getProviderDefaultModel,
3452
3627
  llmConfigFromEnv,
3453
3628
  makeCacheable,
3629
+ makeCircuitBreaker,
3630
+ makeEmbeddingCache,
3454
3631
  retryPolicy
3455
3632
  };
3456
3633
  //# sourceMappingURL=index.js.map