npm - @reactive-agents/llm-provider - Versions diffs - 0.7.8 → 0.9.0 - Mend

@reactive-agents/llm-provider 0.7.8 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/dist/index.js CHANGED Viewed

@@ -1120,6 +1120,14 @@ var init_dist = __esm({
   }
 });
+// src/capabilities.ts
+var DEFAULT_CAPABILITIES = {
+  supportsToolCalling: false,
+  supportsStreaming: true,
+  supportsStructuredOutput: false,
+  supportsLogprobs: false
+};
 // src/types.ts
 import { Schema } from "effect";
 var LLMProviderType = Schema.Literal(
@@ -1378,7 +1386,24 @@ var CompletionResponseSchema = Schema.Struct({
   /** Tool calls emitted by the model (if any) */
   toolCalls: Schema.optional(Schema.Array(ToolCallSchema)),
   /** Internal reasoning from thinking models (e.g. <think> blocks from qwen3, DeepSeek-R1) */
-  thinking: Schema.optional(Schema.String)
+  thinking: Schema.optional(Schema.String),
+  /** Token-level log probabilities (when requested via logprobs in CompletionRequest) */
+  logprobs: Schema.optional(
+    Schema.Array(
+      Schema.Struct({
+        token: Schema.String,
+        logprob: Schema.Number,
+        topLogprobs: Schema.optional(
+          Schema.Array(
+            Schema.Struct({
+              token: Schema.String,
+              logprob: Schema.Number
+            })
+          )
+        )
+      })
+    )
+  )
 });
 // src/errors.ts
@@ -1418,12 +1443,16 @@ var llmConfigFromEnv = LLMConfig.of({
     provider: process.env.EMBEDDING_PROVIDER ?? "openai",
     batchSize: 100
   },
-  supportsPromptCaching: (process.env.LLM_DEFAULT_MODEL || "claude-sonnet-4-20250514").startsWith("claude"),
+  supportsPromptCaching: (() => {
+    const m = process.env.LLM_DEFAULT_MODEL || "claude-sonnet-4-20250514";
+    return m.startsWith("claude") || m.startsWith("gemini") || m.startsWith("gpt");
+  })(),
   maxRetries: Number(process.env.LLM_MAX_RETRIES ?? 3),
   timeoutMs: Number(process.env.LLM_TIMEOUT_MS ?? 3e4),
   defaultMaxTokens: 4096,
   defaultTemperature: Number(process.env.LLM_DEFAULT_TEMPERATURE ?? 0.7),
-  observabilityVerbosity: process.env.LLM_OBSERVABILITY_VERBOSITY ?? "full"
+  observabilityVerbosity: process.env.LLM_OBSERVABILITY_VERBOSITY ?? "full",
+  pricingRegistry: {}
 });
 var LLMConfigFromEnv = Layer.succeed(LLMConfig, llmConfigFromEnv);
@@ -1463,20 +1492,84 @@ var estimateTokenCount = (messages) => Effect2.sync(() => {
   }
   return totalTokens;
 });
-var calculateCost = (inputTokens, outputTokens, model) => {
+function getPricing(model, registry, pricing) {
+  if (pricing?.input !== void 0 && pricing?.output !== void 0) {
+    return { input: pricing.input, output: pricing.output };
+  }
+  if (registry && registry[model]) return registry[model];
   const costMap = {
-    "claude-3-5-haiku-20241022": { input: 1, output: 5 },
+    // ── Anthropic ──
+    "claude-3-5-haiku-20241022": { input: 0.8, output: 4 },
+    "claude-3-haiku-20240307": { input: 0.25, output: 1.25 },
     "claude-sonnet-4-20250514": { input: 3, output: 15 },
     "claude-sonnet-4-5-20250929": { input: 3, output: 15 },
     "claude-opus-4-20250514": { input: 15, output: 75 },
-    "gpt-4o-mini": { input: 0.15, output: 0.6 },
+    "claude-3-5-sonnet-20241022": { input: 3, output: 15 },
+    "claude-3-5-sonnet-20240620": { input: 3, output: 15 },
+    "claude-3-opus-20240229": { input: 15, output: 75 },
+    "claude-3-sonnet-20240229": { input: 3, output: 15 },
+    // ── OpenAI ──
     "gpt-4o": { input: 2.5, output: 10 },
+    "gpt-4o-2024-11-20": { input: 2.5, output: 10 },
+    "gpt-4o-2024-08-06": { input: 2.5, output: 10 },
+    "gpt-4o-2024-05-13": { input: 5, output: 15 },
+    "gpt-4o-mini": { input: 0.15, output: 0.6 },
+    "gpt-4o-mini-2024-07-18": { input: 0.15, output: 0.6 },
+    "gpt-4-turbo": { input: 10, output: 30 },
+    "gpt-4-turbo-2024-04-09": { input: 10, output: 30 },
+    "gpt-4": { input: 30, output: 60 },
+    "gpt-4-0613": { input: 30, output: 60 },
+    "gpt-3.5-turbo": { input: 0.5, output: 1.5 },
+    "o1": { input: 15, output: 60 },
+    "o1-mini": { input: 3, output: 12 },
+    "o1-preview": { input: 15, output: 60 },
+    "o3": { input: 10, output: 40 },
+    "o3-mini": { input: 1.1, output: 4.4 },
+    "o4-mini": { input: 1.1, output: 4.4 },
+    // ── Google Gemini ──
     "gemini-2.0-flash": { input: 0.1, output: 0.4 },
+    "gemini-2.5-flash": { input: 0.15, output: 0.6 },
+    "gemini-2.5-flash-preview-05-20": { input: 0.15, output: 0.6 },
+    "gemini-2.5-pro": { input: 1.25, output: 10 },
     "gemini-2.5-pro-preview-03-25": { input: 1.25, output: 10 },
-    "gemini-embedding-001": { input: 0, output: 0 }
+    "gemini-2.5-pro-preview-05-06": { input: 1.25, output: 10 },
+    "gemini-1.5-pro": { input: 1.25, output: 5 },
+    "gemini-1.5-flash": { input: 0.075, output: 0.3 },
+    "gemini-embedding-001": { input: 0, output: 0 },
+    // ── Meta Llama (via LiteLLM / cloud providers) ──
+    "llama-3.1-405b": { input: 3, output: 3 },
+    "llama-3.1-70b": { input: 0.88, output: 0.88 },
+    "llama-3.1-8b": { input: 0.18, output: 0.18 },
+    "llama-3.3-70b": { input: 0.88, output: 0.88 },
+    // ── Mistral ──
+    "mistral-large-latest": { input: 2, output: 6 },
+    "mistral-small-latest": { input: 0.2, output: 0.6 },
+    "codestral-latest": { input: 0.3, output: 0.9 }
   };
-  const costs = costMap[model] ?? { input: 3, output: 15 };
-  return inputTokens / 1e6 * costs.input + outputTokens / 1e6 * costs.output;
+  if (costMap[model]) return costMap[model];
+  const m = model.toLowerCase();
+  if (m.includes("haiku") || m.includes("flash") || m.includes("mini") || m.includes("small") || m.includes("8b") || m.includes("7b") || m.includes("lite")) {
+    return { input: 0.15, output: 0.6 };
+  }
+  if (m.includes("opus") || m.includes("large") || m.includes("405b") || m.includes("gpt-4") && !m.includes("turbo") && !m.includes("o-") && !m.includes("mini")) {
+    return { input: 15, output: 75 };
+  }
+  return { input: 3, output: 15 };
+}
+var calculateCost = (inputTokens, outputTokens, model, usage, registry, pricing) => {
+  const costs = getPricing(model, registry, pricing);
+  const anthropicCacheRead = usage?.cache_read_input_tokens ?? 0;
+  const anthropicCacheWrite = usage?.cache_creation_input_tokens ?? 0;
+  const openaiCached = usage?.cached_tokens ?? 0;
+  const geminiCached = usage?.cached_content_token_count ?? 0;
+  const baseInputTokens = inputTokens - anthropicCacheRead - anthropicCacheWrite - openaiCached - geminiCached;
+  const inputCost = baseInputTokens / 1e6 * costs.input;
+  const outputCost = outputTokens / 1e6 * costs.output;
+  const anthropicCacheWriteCost = anthropicCacheWrite / 1e6 * costs.input * 1.25;
+  const anthropicCacheReadCost = anthropicCacheRead / 1e6 * costs.input * 0.1;
+  const openaiCachedCost = openaiCached / 1e6 * costs.input * 0.5;
+  const geminiCachedCost = geminiCached / 1e6 * costs.input * 0.25;
+  return inputCost + outputCost + anthropicCacheWriteCost + anthropicCacheReadCost + openaiCachedCost + geminiCachedCost;
 };
 // src/prompt-manager.ts
@@ -1614,13 +1707,14 @@ var toAnthropicMessages = (messages) => messages.filter((m) => m.role !== "syste
     )
   };
 });
-var toAnthropicTool = (tool) => ({
+var toAnthropicTool = (tool, cached = false) => ({
   name: tool.name,
   description: tool.description,
   input_schema: {
     type: "object",
     ...tool.inputSchema
-  }
+  },
+  ...cached ? { cache_control: { type: "ephemeral" } } : {}
 });
 var toEffectError = (error, provider) => {
   const err = error;
@@ -1673,11 +1767,13 @@ var AnthropicProviderLive = Layer3.effect(
             system: buildSystemParam(request.systemPrompt),
             messages: toAnthropicMessages(request.messages),
             stop_sequences: request.stopSequences ? [...request.stopSequences] : void 0,
-            tools: request.tools?.map(toAnthropicTool)
+            tools: request.tools?.map(
+              (t, i) => toAnthropicTool(t, i === (request.tools?.length ?? 0) - 1)
+            )
           }),
           catch: (error) => toEffectError(error, "anthropic")
         });
-        return mapAnthropicResponse(response, model);
+        return mapAnthropicResponse(response, model, config.pricingRegistry);
       }).pipe(
         Effect4.retry(retryPolicy),
         Effect4.timeout("30 seconds"),
@@ -1701,10 +1797,24 @@ var AnthropicProviderLive = Layer3.effect(
             max_tokens: request.maxTokens ?? config.defaultMaxTokens,
             temperature: request.temperature ?? config.defaultTemperature,
             system: buildSystemParam(request.systemPrompt),
-            messages: toAnthropicMessages(request.messages)
+            messages: toAnthropicMessages(request.messages),
+            tools: request.tools?.map(
+              (t, i) => toAnthropicTool(t, i === (request.tools?.length ?? 0) - 1)
+            )
           });
-          stream.on("text", (text) => {
-            emit.single({ type: "text_delta", text });
+          stream.on("streamEvent", (event) => {
+            const e = event;
+            if (e.type === "content_block_delta") {
+              if (e.delta?.type === "text_delta" && e.delta.text) {
+                emit.single({ type: "text_delta", text: e.delta.text });
+              } else if (e.delta?.type === "input_json_delta" && e.delta.partial_json) {
+                emit.single({ type: "tool_use_delta", input: e.delta.partial_json });
+              }
+            } else if (e.type === "content_block_start") {
+              if (e.content_block?.type === "tool_use" && e.content_block.id && e.content_block.name) {
+                emit.single({ type: "tool_use_start", id: e.content_block.id, name: e.content_block.name });
+              }
+            }
           });
           stream.on("finalMessage", (message) => {
             const msg = message;
@@ -1721,7 +1831,12 @@ var AnthropicProviderLive = Layer3.effect(
                 estimatedCost: calculateCost(
                   msg.usage.input_tokens,
                   msg.usage.output_tokens,
-                  model
+                  model,
+                  {
+                    cache_creation_input_tokens: msg.usage.cache_creation_input_tokens,
+                    cache_read_input_tokens: msg.usage.cache_read_input_tokens
+                  },
+                  config.pricingRegistry
                 )
               }
             });
@@ -1867,11 +1982,17 @@ No markdown, no code fences, just raw JSON.`
         jsonSchemaEnforcement: false,
         prefillSupport: true,
         grammarConstraints: false
+      }),
+      capabilities: () => Effect4.succeed({
+        supportsToolCalling: true,
+        supportsStreaming: true,
+        supportsStructuredOutput: true,
+        supportsLogprobs: false
       })
     });
   })
 );
-var mapAnthropicResponse = (response, model) => {
+var mapAnthropicResponse = (response, model, registry) => {
   const textContent = response.content.filter(
     (b) => b.type === "text"
   ).map((b) => b.text).join("");
@@ -1893,7 +2014,12 @@ var mapAnthropicResponse = (response, model) => {
       estimatedCost: calculateCost(
         response.usage.input_tokens,
         response.usage.output_tokens,
-        model
+        model,
+        {
+          cache_creation_input_tokens: response.usage.cache_creation_input_tokens,
+          cache_read_input_tokens: response.usage.cache_read_input_tokens
+        },
+        registry
       )
     },
     model: response.model ?? model,
@@ -1911,6 +2037,28 @@ var toOpenAIMessages = (messages) => messages.map((m) => {
       content: m.content
     };
   }
+  if (m.role === "assistant" && typeof m.content !== "string") {
+    const blocks = m.content;
+    const textParts = blocks.filter((b) => b.type === "text").map((b) => b.text).join("");
+    const toolUseBlocks = blocks.filter(
+      (b) => b.type === "tool_use"
+    );
+    if (toolUseBlocks.length > 0) {
+      return {
+        role: "assistant",
+        content: textParts || "",
+        tool_calls: toolUseBlocks.map((tc) => ({
+          id: tc.id,
+          type: "function",
+          function: {
+            name: tc.name,
+            arguments: typeof tc.input === "string" ? tc.input : JSON.stringify(tc.input)
+          }
+        }))
+      };
+    }
+    return { role: "assistant", content: textParts };
+  }
   return {
     role: m.role,
     content: typeof m.content === "string" ? m.content : m.content.filter(
@@ -1933,12 +2081,49 @@ var toEffectError2 = (error, provider) => {
     cause: error
   });
 };
-var toOpenAITool = (tool) => ({
+var isStrictToolCallingSupported = (model) => {
+  const m = model.toLowerCase();
+  return m.includes("gpt-4o") && (m.includes("2024-08-06") || m.includes("2024-11-20") || !m.includes("2024-05-13")) || m.includes("gpt-4o-mini") || m.startsWith("o1") || m.startsWith("o3") || m.startsWith("o4");
+};
+var toStrictToolSchema = (schema) => {
+  if (!schema || typeof schema !== "object") return schema;
+  const newSchema = JSON.parse(JSON.stringify(schema));
+  if (newSchema.type === "object" && newSchema.properties) {
+    const originalRequired = new Set(newSchema.required ?? []);
+    newSchema.additionalProperties = false;
+    newSchema.required = Object.keys(newSchema.properties);
+    for (const key of Object.keys(newSchema.properties)) {
+      const prop = newSchema.properties[key];
+      if (typeof prop === "object" && prop !== null) {
+        delete prop.default;
+      }
+      if (!originalRequired.has(key) && prop && typeof prop === "object") {
+        if (prop.type && prop.type !== "null" && !prop.anyOf) {
+          prop.anyOf = [{ type: prop.type }, { type: "null" }];
+          delete prop.type;
+        }
+      }
+      if (prop.type === "object" && prop.properties) {
+        newSchema.properties[key] = toStrictToolSchema(prop);
+      } else if (prop.anyOf) {
+        prop.anyOf = prop.anyOf.map(
+          (variant) => variant && variant.type === "object" ? { ...variant, additionalProperties: false } : variant
+        );
+      }
+      if (prop.type === "array" && prop.items && prop.items.type === "object") {
+        newSchema.properties[key].items = toStrictToolSchema(prop.items);
+      }
+    }
+  }
+  return newSchema;
+};
+var toOpenAITool = (tool, strict) => ({
   type: "function",
   function: {
     name: tool.name,
     description: tool.description,
-    parameters: tool.inputSchema
+    parameters: strict ? toStrictToolSchema(tool.inputSchema) : tool.inputSchema,
+    strict: strict || void 0
   }
 });
 var OpenAIProviderLive = Layer4.effect(
@@ -1970,14 +2155,21 @@ var OpenAIProviderLive = Layer4.effect(
           messages,
           stop: request.stopSequences ? [...request.stopSequences] : void 0
         };
+        if (request.logprobs) {
+          requestBody.logprobs = true;
+          if (request.topLogprobs != null) {
+            requestBody.top_logprobs = request.topLogprobs;
+          }
+        }
         if (request.tools && request.tools.length > 0) {
-          requestBody.tools = request.tools.map(toOpenAITool);
+          const strict = isStrictToolCallingSupported(model);
+          requestBody.tools = request.tools.map((t) => toOpenAITool(t, strict));
         }
         const response = yield* Effect5.tryPromise({
           try: () => client.chat.completions.create(requestBody),
           catch: (error) => toEffectError2(error, "openai")
         });
-        return mapOpenAIResponse(response, model);
+        return mapOpenAIResponse(response, model, config.pricingRegistry);
       }).pipe(
         Effect5.retry(retryPolicy),
         Effect5.timeout("30 seconds"),
@@ -2009,38 +2201,71 @@ var OpenAIProviderLive = Layer4.effect(
                   }
                   return msgs;
                 })(),
-                stream: true
+                tools: request.tools && request.tools.length > 0 ? request.tools.map((t) => toOpenAITool(t, isStrictToolCallingSupported(model))) : void 0,
+                stream: true,
+                stream_options: { include_usage: true }
               });
               let fullContent = "";
+              const toolCallAccum = /* @__PURE__ */ new Map();
+              let finalUsage;
               for await (const chunk of stream) {
                 const delta = chunk.choices[0]?.delta?.content;
                 if (delta) {
                   fullContent += delta;
                   emit.single({ type: "text_delta", text: delta });
                 }
+                const toolDeltas = chunk.choices[0]?.delta?.tool_calls;
+                if (toolDeltas) {
+                  for (const tc of toolDeltas) {
+                    const existing = toolCallAccum.get(tc.index);
+                    if (existing) {
+                      if (tc.function?.arguments) existing.arguments += tc.function.arguments;
+                    } else {
+                      toolCallAccum.set(tc.index, {
+                        id: tc.id ?? "",
+                        name: tc.function?.name ?? "",
+                        arguments: tc.function?.arguments ?? ""
+                      });
+                      if (tc.id && tc.function?.name) {
+                        emit.single({ type: "tool_use_start", id: tc.id, name: tc.function.name });
+                      }
+                    }
+                    if (tc.function?.arguments) {
+                      emit.single({ type: "tool_use_delta", input: tc.function.arguments });
+                    }
+                  }
+                }
+                if (chunk.usage) {
+                  finalUsage = chunk.usage;
+                }
                 if (chunk.choices[0]?.finish_reason) {
                   emit.single({
                     type: "content_complete",
                     content: fullContent
                   });
-                  const inputTokens = chunk.usage?.prompt_tokens ?? 0;
-                  const outputTokens = chunk.usage?.completion_tokens ?? 0;
-                  emit.single({
-                    type: "usage",
-                    usage: {
-                      inputTokens,
-                      outputTokens,
-                      totalTokens: inputTokens + outputTokens,
-                      estimatedCost: calculateCost(
-                        inputTokens,
-                        outputTokens,
-                        model
-                      )
-                    }
-                  });
-                  emit.end();
                 }
               }
+              const inputTokens = finalUsage?.prompt_tokens ?? 0;
+              const outputTokens = finalUsage?.completion_tokens ?? 0;
+              const cacheUsage = {
+                cached_tokens: finalUsage?.prompt_tokens_details?.cached_tokens
+              };
+              emit.single({
+                type: "usage",
+                usage: {
+                  inputTokens,
+                  outputTokens,
+                  totalTokens: inputTokens + outputTokens,
+                  estimatedCost: calculateCost(
+                    inputTokens,
+                    outputTokens,
+                    model,
+                    cacheUsage,
+                    config.pricingRegistry
+                  )
+                }
+              });
+              emit.end();
             } catch (error) {
               const err = error;
               emit.fail(
@@ -2105,7 +2330,8 @@ ${schemaStr}`
           });
           const response = mapOpenAIResponse(
             completeResult,
-            model
+            model,
+            config.pricingRegistry
           );
           try {
             const parsed = JSON.parse(response.content);
@@ -2167,11 +2393,17 @@ ${schemaStr}`
         jsonSchemaEnforcement: true,
         prefillSupport: false,
         grammarConstraints: false
+      }),
+      capabilities: () => Effect5.succeed({
+        supportsToolCalling: true,
+        supportsStreaming: true,
+        supportsStructuredOutput: true,
+        supportsLogprobs: true
       })
     });
   })
 );
-var mapOpenAIResponse = (response, model) => {
+var mapOpenAIResponse = (response, model, registry) => {
   const message = response.choices[0]?.message;
   const content = message?.content ?? "";
   const rawToolCalls = message?.tool_calls;
@@ -2190,6 +2422,17 @@ var mapOpenAIResponse = (response, model) => {
       input
     };
   }) : void 0;
+  const rawLogprobs = response.choices[0]?.logprobs?.content;
+  const logprobs = rawLogprobs ? rawLogprobs.map((lp) => ({
+    token: lp.token,
+    logprob: lp.logprob,
+    ...lp.top_logprobs ? {
+      topLogprobs: lp.top_logprobs.map((tlp) => ({
+        token: tlp.token,
+        logprob: tlp.logprob
+      }))
+    } : {}
+  })) : void 0;
   return {
     content,
     stopReason,
@@ -2200,11 +2443,16 @@ var mapOpenAIResponse = (response, model) => {
       estimatedCost: calculateCost(
         response.usage?.prompt_tokens ?? 0,
         response.usage?.completion_tokens ?? 0,
-        model
+        model,
+        {
+          cached_tokens: response.usage?.prompt_tokens_details?.cached_tokens
+        },
+        registry
       )
     },
     model: response.model ?? model,
-    toolCalls
+    toolCalls,
+    ...logprobs ? { logprobs } : {}
   };
 };
@@ -2216,7 +2464,7 @@ var PROVIDER_DEFAULT_MODELS = {
   anthropic: "claude-sonnet-4-20250514",
   openai: "gpt-4o",
   ollama: "cogito:14b",
-  gemini: "gemini-2.0-flash",
+  gemini: "gemini-2.5-flash",
   litellm: "gpt-4o",
   test: "test-model"
 };
@@ -2349,7 +2597,9 @@ var LocalProviderLive = Layer5.effect(
               options: {
                 temperature: request.temperature ?? config.defaultTemperature,
                 num_predict: request.maxTokens ?? config.defaultMaxTokens,
-                stop: request.stopSequences ? [...request.stopSequences] : void 0
+                stop: request.stopSequences ? [...request.stopSequences] : void 0,
+                ...request.logprobs ? { logprobs: true } : {},
+                ...request.topLogprobs != null ? { top_logprobs: request.topLogprobs } : {}
               }
             });
           },
@@ -2363,6 +2613,17 @@ var LocalProviderLive = Layer5.effect(
           response.message?.tool_calls
         );
         const hasToolCalls = toolCalls && toolCalls.length > 0;
+        const rawLogprobs = response.logprobs;
+        const logprobs = rawLogprobs ? rawLogprobs.map((lp) => ({
+          token: lp.token,
+          logprob: lp.logprob,
+          ...lp.top_logprobs ? {
+            topLogprobs: lp.top_logprobs.map((tlp) => ({
+              token: tlp.token,
+              logprob: tlp.logprob
+            }))
+          } : {}
+        })) : void 0;
         return {
           content,
           stopReason: hasToolCalls ? "tool_use" : response.done_reason === "stop" ? "end_turn" : response.done_reason === "length" ? "max_tokens" : "end_turn",
@@ -2375,7 +2636,8 @@ var LocalProviderLive = Layer5.effect(
           },
           model: response.model ?? model,
           toolCalls,
-          ...thinkingContent ? { thinking: thinkingContent } : {}
+          ...thinkingContent ? { thinking: thinkingContent } : {},
+          ...logprobs ? { logprobs } : {}
         };
       }).pipe(
         Effect6.retry(retryPolicy),
@@ -2409,6 +2671,7 @@ var LocalProviderLive = Layer5.effect(
                 model,
                 config.thinking
               );
+              const wantLogprobs = request.logprobs ?? false;
               const stream = await client.chat({
                 model,
                 messages: msgs,
@@ -2418,10 +2681,13 @@ var LocalProviderLive = Layer5.effect(
                 keep_alive: "5m",
                 options: {
                   temperature: request.temperature ?? config.defaultTemperature,
-                  num_predict: request.maxTokens ?? config.defaultMaxTokens
+                  num_predict: request.maxTokens ?? config.defaultMaxTokens,
+                  ...wantLogprobs ? { logprobs: true } : {}
                 }
               });
               let fullContent = "";
+              const accumulatedLogprobs = [];
+              const accumulatedToolCalls = [];
               for await (const chunk of stream) {
                 if (chunk.message?.content) {
                   fullContent += chunk.message.content;
@@ -2430,11 +2696,53 @@ var LocalProviderLive = Layer5.effect(
                     text: chunk.message.content
                   });
                 }
+                if (chunk.message?.tool_calls && Array.isArray(chunk.message.tool_calls)) {
+                  for (const tc of chunk.message.tool_calls) {
+                    const toolCall = {
+                      id: `ollama-tc-${Date.now()}-${accumulatedToolCalls.length}`,
+                      name: tc.function.name,
+                      input: tc.function.arguments
+                    };
+                    accumulatedToolCalls.push(toolCall);
+                    emit.single({
+                      type: "tool_use_start",
+                      id: toolCall.id,
+                      name: toolCall.name
+                    });
+                    emit.single({
+                      type: "tool_use_delta",
+                      input: JSON.stringify(tc.function.arguments)
+                    });
+                  }
+                }
+                if (wantLogprobs) {
+                  const chunkLp = chunk.logprobs;
+                  if (Array.isArray(chunkLp)) {
+                    for (const lp of chunkLp) {
+                      accumulatedLogprobs.push({
+                        token: lp.token,
+                        logprob: lp.logprob,
+                        ...lp.top_logprobs ? { topLogprobs: lp.top_logprobs.map((t) => ({ token: t.token, logprob: t.logprob })) } : {}
+                      });
+                    }
+                  }
+                }
                 if (chunk.done) {
+                  const hasToolCalls = accumulatedToolCalls.length > 0;
+                  const doneReason = chunk.done_reason;
                   emit.single({
                     type: "content_complete",
-                    content: fullContent
+                    content: fullContent,
+                    ...hasToolCalls ? { stopReason: "tool_use" } : {
+                      stopReason: doneReason === "stop" ? "end_turn" : doneReason === "length" ? "max_tokens" : "end_turn"
+                    }
                   });
+                  if (accumulatedLogprobs.length > 0) {
+                    emit.single({
+                      type: "logprobs",
+                      logprobs: accumulatedLogprobs
+                    });
+                  }
                   emit.single({
                     type: "usage",
                     usage: {
@@ -2563,6 +2871,12 @@ No markdown, no code fences, just raw JSON.`
         jsonSchemaEnforcement: true,
         prefillSupport: false,
         grammarConstraints: true
+      }),
+      capabilities: () => Effect6.succeed({
+        supportsToolCalling: true,
+        supportsStreaming: true,
+        supportsStructuredOutput: true,
+        supportsLogprobs: false
       })
     });
   })
@@ -2579,7 +2893,7 @@ var toGeminiContents = (messages) => {
         role: "user",
         parts: [{
           functionResponse: {
-            name: "tool",
+            name: msg.toolName ?? "unknown_tool",
             response: { content: msg.content }
           }
         }]
@@ -2601,7 +2915,7 @@ var toGeminiContents = (messages) => {
         } else if (block.type === "tool_result") {
           parts.push({
             functionResponse: {
-              name: "tool",
+              name: block.name ?? "unknown_tool",
               response: { content: block.content }
             }
           });
@@ -2643,7 +2957,7 @@ var toEffectError3 = (error) => {
     cause: error
   });
 };
-var mapGeminiResponse = (response, model) => {
+var mapGeminiResponse = (response, model, registry) => {
   const toolCalls = response.functionCalls?.map((fc, i) => ({
     id: `call_${i}`,
     name: fc.name,
@@ -2658,7 +2972,15 @@ var mapGeminiResponse = (response, model) => {
       inputTokens,
       outputTokens,
       totalTokens: inputTokens + outputTokens,
-      estimatedCost: calculateCost(inputTokens, outputTokens, model)
+      estimatedCost: calculateCost(
+        inputTokens,
+        outputTokens,
+        model,
+        {
+          cached_content_token_count: response.usageMetadata?.cachedContentTokenCount
+        },
+        registry
+      )
     },
     model,
     toolCalls: toolCalls?.length ? toolCalls : void 0
@@ -2714,7 +3036,7 @@ var GeminiProviderLive = Layer6.effect(
           }),
           catch: toEffectError3
         });
-        return mapGeminiResponse(response, model);
+        return mapGeminiResponse(response, model, config.pricingRegistry);
       }).pipe(
         Effect7.retry(retryPolicy),
         Effect7.timeout("30 seconds"),
@@ -2746,30 +3068,56 @@ var GeminiProviderLive = Layer6.effect(
                 config: buildGeminiConfig({
                   maxTokens: request.maxTokens,
                   temperature: request.temperature,
-                  systemPrompt
+                  systemPrompt,
+                  tools: request.tools
                 })
               });
               let fullContent = "";
               let inputTokens = 0;
               let outputTokens = 0;
+              let cachedContentTokens = 0;
+              const accumulatedToolCalls = [];
               for await (const chunk of stream) {
                 if (chunk.text) {
                   emit.single({ type: "text_delta", text: chunk.text });
                   fullContent += chunk.text;
                 }
+                const fcs = chunk.functionCalls;
+                if (fcs && fcs.length > 0) {
+                  for (const fc of fcs) {
+                    const tcId = `gemini-tc-${Date.now()}-${accumulatedToolCalls.length}`;
+                    accumulatedToolCalls.push({ id: tcId, name: fc.name, input: fc.args });
+                    emit.single({ type: "tool_use_start", id: tcId, name: fc.name });
+                    emit.single({ type: "tool_use_delta", input: JSON.stringify(fc.args) });
+                  }
+                }
                 if (chunk.usageMetadata) {
                   inputTokens = chunk.usageMetadata.promptTokenCount ?? 0;
                   outputTokens = chunk.usageMetadata.candidatesTokenCount ?? 0;
+                  cachedContentTokens = chunk.usageMetadata.cachedContentTokenCount ?? 0;
                 }
               }
-              emit.single({ type: "content_complete", content: fullContent });
+              const hasToolCalls = accumulatedToolCalls.length > 0;
+              emit.single({
+                type: "content_complete",
+                content: fullContent,
+                ...hasToolCalls ? { stopReason: "tool_use", toolCalls: accumulatedToolCalls } : {}
+              });
               emit.single({
                 type: "usage",
                 usage: {
                   inputTokens,
                   outputTokens,
                   totalTokens: inputTokens + outputTokens,
-                  estimatedCost: calculateCost(inputTokens, outputTokens, model)
+                  estimatedCost: calculateCost(
+                    inputTokens,
+                    outputTokens,
+                    model,
+                    {
+                      cached_content_token_count: cachedContentTokens || void 0
+                    },
+                    config.pricingRegistry
+                  )
                 }
               });
               emit.end();
@@ -2831,7 +3179,7 @@ ${schemaStr}`
             }),
             catch: toEffectError3
           });
-          const mapped = mapGeminiResponse(response, model);
+          const mapped = mapGeminiResponse(response, model, config.pricingRegistry);
           try {
             const parsed = JSON.parse(mapped.content);
             const decoded = Schema5.decodeUnknownEither(
@@ -2884,6 +3232,12 @@ ${schemaStr}`
         jsonSchemaEnforcement: false,
         prefillSupport: false,
         grammarConstraints: false
+      }),
+      capabilities: () => Effect7.succeed({
+        supportsToolCalling: true,
+        supportsStreaming: true,
+        supportsStructuredOutput: true,
+        supportsLogprobs: false
       })
     });
   })
@@ -2929,7 +3283,7 @@ var toLiteLLMTool = (tool) => ({
     parameters: tool.inputSchema
   }
 });
-var mapLiteLLMResponse = (response, model) => {
+var mapLiteLLMResponse = (response, model, registry) => {
   const message = response.choices[0]?.message;
   const content = message?.content ?? "";
   const rawToolCalls = message?.tool_calls;
@@ -2954,7 +3308,14 @@ var mapLiteLLMResponse = (response, model) => {
       estimatedCost: calculateCost(
         response.usage?.prompt_tokens ?? 0,
         response.usage?.completion_tokens ?? 0,
-        model
+        model,
+        void 0,
+        registry,
+        // Prioritize costs returned directly from the proxy if available
+        response.usage?.input_cost !== void 0 && response.usage?.output_cost !== void 0 ? {
+          input: response.usage.input_cost / (response.usage.prompt_tokens || 1) * 1e6,
+          output: response.usage.output_cost / (response.usage.completion_tokens || 1) * 1e6
+        } : void 0
       )
     },
     model: response.model ?? model,
@@ -3008,7 +3369,11 @@ var LiteLLMProviderLive = Layer7.effect(
           try: () => liteLLMFetch(baseURL, "/chat/completions", requestBody, apiKey),
           catch: (error) => toEffectError4(error)
         });
-        return mapLiteLLMResponse(response, model);
+        return mapLiteLLMResponse(
+          response,
+          model,
+          config.pricingRegistry
+        );
       }).pipe(
         Effect8.retry(retryPolicy),
         Effect8.timeout("30 seconds"),
@@ -3094,7 +3459,9 @@ var LiteLLMProviderLive = Layer7.effect(
                           estimatedCost: calculateCost(
                             inputTokens,
                             outputTokens,
-                            model
+                            model,
+                            void 0,
+                            config.pricingRegistry
                           )
                         }
                       });
@@ -3165,7 +3532,8 @@ No markdown, no code fences, just raw JSON.`
           });
           const response = mapLiteLLMResponse(
             completeResult,
-            model
+            model,
+            config.pricingRegistry
           );
           try {
             const parsed = JSON.parse(response.content);
@@ -3229,6 +3597,12 @@ No markdown, no code fences, just raw JSON.`
         jsonSchemaEnforcement: false,
         prefillSupport: false,
         grammarConstraints: false
+      }),
+      capabilities: () => Effect8.succeed({
+        supportsToolCalling: true,
+        supportsStreaming: true,
+        supportsStructuredOutput: true,
+        supportsLogprobs: false
       })
     });
   })
@@ -3236,109 +3610,217 @@ No markdown, no code fences, just raw JSON.`
 // src/testing.ts
 import { Effect as Effect9, Layer as Layer8, Stream as Stream6, Schema as Schema7 } from "effect";
-var TestLLMService = (responses) => ({
-  complete: (request) => Effect9.gen(function* () {
-    const lastMessage = request.messages[request.messages.length - 1];
-    const content = lastMessage && typeof lastMessage.content === "string" ? lastMessage.content : "";
-    const systemPrompt = typeof request.systemPrompt === "string" ? request.systemPrompt : "";
-    const searchText = `${content} ${systemPrompt}`;
-    for (const [pattern, response] of Object.entries(responses)) {
-      if (pattern.length > 0 && searchText.includes(pattern)) {
+function fakeUsage(inputLen, outputLen) {
+  return {
+    inputTokens: Math.ceil(inputLen / 4),
+    outputTokens: Math.ceil(outputLen / 4),
+    totalTokens: Math.ceil(inputLen / 4) + Math.ceil(outputLen / 4),
+    estimatedCost: 0
+  };
+}
+function extractSearchText(messages, request) {
+  const lastMessage = messages[messages.length - 1];
+  const content = lastMessage && typeof lastMessage.content === "string" ? lastMessage.content : "";
+  const systemPrompt = typeof request.systemPrompt === "string" ? request.systemPrompt : "";
+  return `${content} ${systemPrompt}`.trim();
+}
+function resolveTurn(scenario, callIndex, searchText) {
+  for (let i = callIndex.value; i < scenario.length; i++) {
+    const turn = scenario[i];
+    const guard = turn.match;
+    if (!guard || new RegExp(guard, "i").test(searchText)) {
+      callIndex.value = Math.min(i + 1, scenario.length - 1);
+      return { turn, matchedIndex: i };
+    }
+  }
+  return { turn: scenario[scenario.length - 1], matchedIndex: scenario.length - 1 };
+}
+function buildToolCalls(specs, matchedIndex) {
+  return specs.map((spec, i) => ({
+    id: spec.id ?? `call-${matchedIndex}-${i}`,
+    name: spec.name,
+    input: spec.args
+  }));
+}
+var TestLLMService = (scenario) => {
+  const callIndex = { value: 0 };
+  return {
+    complete: (request) => Effect9.gen(function* () {
+      const searchText = extractSearchText(request.messages, request);
+      const { turn, matchedIndex } = resolveTurn(scenario, callIndex, searchText);
+      if ("error" in turn) {
+        throw new Error(turn.error);
+      }
+      if ("toolCall" in turn) {
         return {
-          content: response,
-          stopReason: "end_turn",
-          usage: {
-            inputTokens: Math.ceil(content.length / 4),
-            outputTokens: Math.ceil(response.length / 4),
-            totalTokens: Math.ceil(content.length / 4) + Math.ceil(response.length / 4),
-            estimatedCost: 0
-          },
-          model: "test-model"
+          content: "",
+          stopReason: "tool_use",
+          usage: fakeUsage(searchText.length, 0),
+          model: "test-model",
+          toolCalls: buildToolCalls([turn.toolCall], matchedIndex)
         };
       }
-    }
-    return {
-      content: "Test response",
-      stopReason: "end_turn",
-      usage: {
-        inputTokens: 0,
-        outputTokens: 0,
-        totalTokens: 0,
-        estimatedCost: 0
-      },
-      model: "test-model"
-    };
-  }),
-  stream: (request) => {
-    const lastMessage = request.messages[request.messages.length - 1];
-    const content = lastMessage && typeof lastMessage.content === "string" ? lastMessage.content : "";
-    const systemPrompt = typeof request.systemPrompt === "string" ? request.systemPrompt : "";
-    const searchText = `${content} ${systemPrompt}`;
-    let matchedResponse = "Test response";
-    for (const [pattern, response] of Object.entries(responses)) {
-      if (pattern.length > 0 && searchText.includes(pattern)) {
-        matchedResponse = response;
-        break;
+      if ("toolCalls" in turn) {
+        return {
+          content: "",
+          stopReason: "tool_use",
+          usage: fakeUsage(searchText.length, 0),
+          model: "test-model",
+          toolCalls: buildToolCalls(turn.toolCalls, matchedIndex)
+        };
       }
-    }
-    const inputTokens = Math.ceil(content.length / 4);
-    const outputTokens = Math.ceil(matchedResponse.length / 4);
-    return Effect9.succeed(
-      Stream6.make(
-        {
-          type: "text_delta",
-          text: matchedResponse
-        },
-        {
-          type: "content_complete",
-          content: matchedResponse
-        },
-        {
-          type: "usage",
-          usage: {
-            inputTokens,
-            outputTokens,
-            totalTokens: inputTokens + outputTokens,
-            estimatedCost: 0
+      const content = "json" in turn ? JSON.stringify(turn.json) : "text" in turn ? turn.text : "";
+      return {
+        content,
+        stopReason: "end_turn",
+        usage: fakeUsage(searchText.length, content.length),
+        model: "test-model"
+      };
+    }),
+    stream: (request) => {
+      const searchText = extractSearchText(request.messages, request);
+      const { turn, matchedIndex } = resolveTurn(scenario, callIndex, searchText);
+      if ("error" in turn) {
+        return Effect9.succeed(
+          Stream6.make(
+            { type: "error", error: turn.error }
+          )
+        );
+      }
+      const specs = "toolCall" in turn ? [turn.toolCall] : "toolCalls" in turn ? turn.toolCalls : null;
+      if (specs) {
+        const events = [
+          ...specs.flatMap((spec, i) => [
+            {
+              type: "tool_use_start",
+              id: spec.id ?? `call-${matchedIndex}-${i}`,
+              name: spec.name
+            },
+            {
+              type: "tool_use_delta",
+              input: JSON.stringify(spec.args)
+            }
+          ]),
+          { type: "content_complete", content: "" },
+          { type: "usage", usage: fakeUsage(searchText.length, 0) }
+        ];
+        return Effect9.succeed(
+          Stream6.fromIterable(events)
+        );
+      }
+      const content = "json" in turn ? JSON.stringify(turn.json) : "text" in turn ? turn.text : "";
+      const inputTokens = Math.ceil(searchText.length / 4);
+      const outputTokens = Math.ceil(content.length / 4);
+      return Effect9.succeed(
+        Stream6.make(
+          { type: "text_delta", text: content },
+          { type: "content_complete", content },
+          {
+            type: "usage",
+            usage: {
+              inputTokens,
+              outputTokens,
+              totalTokens: inputTokens + outputTokens,
+              estimatedCost: 0
+            }
           }
-        }
+        )
+      );
+    },
+    completeStructured: (request) => Effect9.gen(function* () {
+      const searchText = extractSearchText(request.messages, request);
+      const { turn } = resolveTurn(scenario, callIndex, searchText);
+      if ("error" in turn) {
+        throw new Error(turn.error);
+      }
+      if ("json" in turn) {
+        return turn.json;
+      }
+      const responseContent = "text" in turn ? turn.text : "{}";
+      const parsed = JSON.parse(responseContent);
+      return Schema7.decodeUnknownSync(request.outputSchema)(parsed);
+    }),
+    embed: (texts) => Effect9.succeed(
+      texts.map(() => new Array(768).fill(0).map(() => Math.random()))
+    ),
+    countTokens: (messages) => Effect9.succeed(
+      messages.reduce(
+        (sum, m) => sum + (typeof m.content === "string" ? Math.ceil(m.content.length / 4) : 100),
+        0
       )
-    );
-  },
-  completeStructured: (request) => Effect9.gen(function* () {
-    const lastMessage = request.messages[request.messages.length - 1];
-    const content = lastMessage && typeof lastMessage.content === "string" ? lastMessage.content : "";
-    let responseContent = "Test response";
-    for (const [pattern, response] of Object.entries(responses)) {
-      if (content.includes(pattern)) {
-        responseContent = response;
-        break;
+    ),
+    getModelConfig: () => Effect9.succeed({
+      provider: "anthropic",
+      model: "test-model"
+    }),
+    getStructuredOutputCapabilities: () => Effect9.succeed({
+      nativeJsonMode: true,
+      jsonSchemaEnforcement: false,
+      prefillSupport: false,
+      grammarConstraints: false
+    }),
+    capabilities: () => Effect9.succeed({
+      ...DEFAULT_CAPABILITIES,
+      supportsToolCalling: true,
+      // Test provider emits native FC stream events (tool_use_start/tool_use_delta)
+      supportsStreaming: true
+    })
+  };
+};
+var TestLLMServiceLayer = (scenario = [{ text: "" }]) => Layer8.succeed(LLMService, LLMService.of(TestLLMService(scenario)));
+// src/pricing.ts
+import { Effect as Effect10 } from "effect";
+var openRouterPricingProvider = {
+  fetchPricing: () => Effect10.gen(function* () {
+    const res = yield* Effect10.tryPromise({
+      try: () => fetch("https://openrouter.ai/api/v1/models"),
+      catch: (e) => new Error(`Fetch failed: ${e}`)
+    });
+    if (!res.ok) {
+      return yield* Effect10.fail(new Error(`OpenRouter API returned ${res.status}`));
+    }
+    const json = yield* Effect10.tryPromise({
+      try: () => res.json(),
+      catch: (e) => new Error(`JSON parse failed: ${e}`)
+    });
+    const registry = {};
+    for (const model of json.data) {
+      registry[model.id] = {
+        input: parseFloat(model.pricing.prompt) * 1e6,
+        output: parseFloat(model.pricing.completion) * 1e6
+      };
+      const shortName = model.id.split("/").pop();
+      if (shortName && !registry[shortName]) {
+        registry[shortName] = registry[model.id];
       }
     }
-    const parsed = JSON.parse(responseContent);
-    return Schema7.decodeUnknownSync(request.outputSchema)(parsed);
-  }),
-  embed: (texts) => Effect9.succeed(
-    texts.map(() => new Array(768).fill(0).map(() => Math.random()))
-  ),
-  countTokens: (messages) => Effect9.succeed(
-    messages.reduce(
-      (sum, m) => sum + (typeof m.content === "string" ? Math.ceil(m.content.length / 4) : 100),
-      0
-    )
-  ),
-  getModelConfig: () => Effect9.succeed({
-    provider: "anthropic",
-    model: "test-model"
-  }),
-  getStructuredOutputCapabilities: () => Effect9.succeed({
-    nativeJsonMode: true,
-    jsonSchemaEnforcement: false,
-    prefillSupport: false,
-    grammarConstraints: false
+    return registry;
+  })
+};
+var urlPricingProvider = (url) => ({
+  fetchPricing: () => Effect10.gen(function* () {
+    const res = yield* Effect10.tryPromise({
+      try: () => fetch(url),
+      catch: (e) => new Error(`Fetch failed: ${e}`)
+    });
+    if (!res.ok) {
+      return yield* Effect10.fail(new Error(`Custom pricing URL returned ${res.status}`));
+    }
+    const json = yield* Effect10.tryPromise({
+      try: () => res.json(),
+      catch: (e) => new Error(`JSON parse failed: ${e}`)
+    });
+    const registry = {};
+    for (const [key, value] of Object.entries(json)) {
+      registry[key] = {
+        input: Number(value.input),
+        output: Number(value.output)
+      };
+    }
+    return registry;
   })
 });
-var TestLLMServiceLayer = (responses = {}) => Layer8.succeed(LLMService, LLMService.of(TestLLMService(responses)));
 // src/structured-output.ts
 import { Schema as Schema8 } from "effect";
@@ -3405,10 +3887,10 @@ var ComplexityAnalysisSchema = Schema8.Struct({
 });
 // src/runtime.ts
-import { Effect as Effect12, Layer as Layer9 } from "effect";
+import { Effect as Effect13, Layer as Layer9 } from "effect";
 // src/embedding-cache.ts
-import { Effect as Effect10 } from "effect";
+import { Effect as Effect11 } from "effect";
 var MAX_ENTRIES = 5e3;
 var makeEmbeddingCache = (underlying) => {
   const caches = /* @__PURE__ */ new Map();
@@ -3432,7 +3914,7 @@ var makeEmbeddingCache = (underlying) => {
     }
   };
   return {
-    embed: (texts, model) => Effect10.gen(function* () {
+    embed: (texts, model) => Effect11.gen(function* () {
       const modelKey = model ?? "__default__";
       const cache = getModelCache(modelKey);
       const results = new Array(texts.length);
@@ -3472,7 +3954,7 @@ var makeEmbeddingCache = (underlying) => {
 };
 // src/circuit-breaker.ts
-import { Effect as Effect11 } from "effect";
+import { Effect as Effect12 } from "effect";
 var makeCircuitBreaker = (config = {}) => {
   const { failureThreshold, cooldownMs } = {
     ...defaultCircuitBreakerConfig,
@@ -3493,12 +3975,12 @@ var makeCircuitBreaker = (config = {}) => {
     }
   };
   return {
-    protect: (effect) => Effect11.gen(function* () {
+    protect: (effect) => Effect12.gen(function* () {
       if (currentState === "open") {
         if (Date.now() - openedAt >= cooldownMs) {
           currentState = "half_open";
         } else {
-          return yield* Effect11.fail(
+          return yield* Effect12.fail(
             new LLMError({
               message: `Circuit breaker OPEN \u2014 ${consecutiveFailures} consecutive failures. Retry after ${Math.ceil((cooldownMs - (Date.now() - openedAt)) / 1e3)}s cooldown.`,
               provider: "custom",
@@ -3507,13 +3989,13 @@ var makeCircuitBreaker = (config = {}) => {
           );
         }
       }
-      const result = yield* Effect11.exit(effect);
+      const result = yield* Effect12.exit(effect);
       if (result._tag === "Success") {
         onSuccess();
         return result.value;
       }
       onFailure();
-      return yield* Effect11.failCause(result.cause);
+      return yield* Effect12.failCause(result.cause);
     }),
     state: () => currentState,
     reset: () => {
@@ -3527,7 +4009,7 @@ var makeCircuitBreaker = (config = {}) => {
 // src/runtime.ts
 var EmbeddingCacheLayer = Layer9.effect(
   LLMService,
-  Effect12.gen(function* () {
+  Effect13.gen(function* () {
     const llm = yield* LLMService;
     const cache = makeEmbeddingCache(llm.embed);
     return LLMService.of({ ...llm, embed: cache.embed });
@@ -3535,7 +4017,7 @@ var EmbeddingCacheLayer = Layer9.effect(
 );
 var makeCircuitBreakerLayer = (config) => Layer9.effect(
   LLMService,
-  Effect12.gen(function* () {
+  Effect13.gen(function* () {
     const llm = yield* LLMService;
     const breaker = makeCircuitBreaker(config);
     return LLMService.of({
@@ -3545,10 +4027,10 @@ var makeCircuitBreakerLayer = (config) => Layer9.effect(
     });
   })
 );
-var createLLMProviderLayer = (provider = "anthropic", testResponses, model, modelParams, circuitBreaker) => {
+var createLLMProviderLayer = (provider = "anthropic", testScenario, model, modelParams, circuitBreaker, pricingRegistry) => {
   if (provider === "test") {
     return Layer9.mergeAll(
-      TestLLMServiceLayer(testResponses ?? {}),
+      TestLLMServiceLayer(testScenario ?? [{ text: "" }]),
       PromptManagerLive
     );
   }
@@ -3557,6 +4039,7 @@ var createLLMProviderLayer = (provider = "anthropic", testResponses, model, mode
   if (modelParams?.thinking !== void 0) configOverrides.thinking = modelParams.thinking;
   if (modelParams?.temperature !== void 0) configOverrides.defaultTemperature = modelParams.temperature;
   if (modelParams?.maxTokens !== void 0) configOverrides.defaultMaxTokens = modelParams.maxTokens;
+  if (pricingRegistry) configOverrides.pricingRegistry = pricingRegistry;
   const configLayer = Object.keys(configOverrides).length > 0 ? Layer9.succeed(LLMConfig, LLMConfig.of({ ...llmConfigFromEnv, ...configOverrides })) : LLMConfigFromEnv;
   const providerLayer = provider === "anthropic" ? AnthropicProviderLive : provider === "openai" ? OpenAIProviderLive : provider === "gemini" ? GeminiProviderLive : provider === "litellm" ? LiteLLMProviderLive : LocalProviderLive;
   const baseProviderLayer = providerLayer.pipe(Layer9.provide(configLayer));
@@ -3577,13 +4060,331 @@ var createLLMProviderLayerWithConfig = (config, provider = "anthropic") => {
     PromptManagerLive
   );
 };
+// src/rate-limiter.ts
+import { Effect as Effect14 } from "effect";
+var DEFAULT_CONFIG = {
+  requestsPerMinute: 60,
+  tokensPerMinute: 1e5,
+  maxConcurrent: 10
+};
+var makeRateLimiter = (config = {}) => {
+  const resolved = { ...DEFAULT_CONFIG, ...config };
+  const WINDOW_MS = 6e4;
+  const window2 = [];
+  let concurrent = 0;
+  const prune = (now) => {
+    const cutoff = now - WINDOW_MS;
+    while (window2.length > 0 && window2[0].ts <= cutoff) {
+      window2.shift();
+    }
+  };
+  const currentTokens = () => {
+    return window2.reduce((sum, entry) => sum + entry.tokens, 0);
+  };
+  return {
+    acquire: (messages) => Effect14.gen(function* () {
+      const estimatedTokens = messages ? yield* estimateTokenCount(messages) : 0;
+      while (true) {
+        const now = Date.now();
+        prune(now);
+        if (concurrent >= resolved.maxConcurrent) {
+          yield* Effect14.sleep("100 millis");
+          continue;
+        }
+        if (window2.length >= resolved.requestsPerMinute) {
+          const oldestTs = window2[0].ts;
+          const waitMs = oldestTs + WINDOW_MS - now;
+          if (waitMs > 0) {
+            yield* Effect14.sleep(`${waitMs} millis`);
+            continue;
+          }
+        }
+        if (estimatedTokens > 0 && currentTokens() + estimatedTokens > resolved.tokensPerMinute && window2.length > 0) {
+          const oldestTs = window2[0].ts;
+          const waitMs = oldestTs + WINDOW_MS - now;
+          if (waitMs > 0) {
+            yield* Effect14.sleep(`${waitMs} millis`);
+            continue;
+          }
+        }
+        window2.push({ ts: now, tokens: estimatedTokens });
+        concurrent++;
+        return;
+      }
+    }),
+    release: () => {
+      if (concurrent > 0) concurrent--;
+    },
+    concurrentCount: () => concurrent,
+    windowRequestCount: () => {
+      prune(Date.now());
+      return window2.length;
+    },
+    windowTokenCount: () => {
+      prune(Date.now());
+      return currentTokens();
+    }
+  };
+};
+// src/rate-limited-provider.ts
+import { Effect as Effect15, Layer as Layer10 } from "effect";
+var makeRateLimitedProvider = (config = {}) => Layer10.effect(
+  LLMService,
+  Effect15.gen(function* () {
+    const svc = yield* LLMService;
+    const limiter = makeRateLimiter(config);
+    return {
+      complete: (req) => Effect15.gen(function* () {
+        yield* limiter.acquire(req.messages);
+        try {
+          return yield* svc.complete(req);
+        } finally {
+          limiter.release();
+        }
+      }),
+      stream: (req) => Effect15.gen(function* () {
+        yield* limiter.acquire(req.messages);
+        try {
+          return yield* svc.stream(req);
+        } finally {
+          limiter.release();
+        }
+      }),
+      completeStructured: (req) => Effect15.gen(function* () {
+        yield* limiter.acquire(req.messages);
+        try {
+          return yield* svc.completeStructured(req);
+        } finally {
+          limiter.release();
+        }
+      }),
+      // Passthrough — embedding, token counting, config, and capabilities are not rate-limited
+      embed: svc.embed,
+      countTokens: svc.countTokens,
+      getModelConfig: svc.getModelConfig,
+      getStructuredOutputCapabilities: svc.getStructuredOutputCapabilities,
+      capabilities: svc.capabilities
+    };
+  })
+);
+// src/fallback-chain.ts
+var FallbackChain = class {
+  constructor(config) {
+    this.config = config;
+    this.threshold = config.errorThreshold ?? 3;
+  }
+  /** Error count per provider. */
+  errorCounts = /* @__PURE__ */ new Map();
+  /** Current index in the providers list. */
+  currentProviderIndex = 0;
+  /** Current index in the models list. */
+  currentModelIndex = 0;
+  /** Threshold for switching to next provider. */
+  threshold;
+  /**
+   * Record an error for the given provider.
+   * Increments the error count and switches to the next provider if threshold is met.
+   *
+   * @param provider - Provider name that errored
+   */
+  recordError(provider) {
+    const count = (this.errorCounts.get(provider) ?? 0) + 1;
+    this.errorCounts.set(provider, count);
+    if (count >= this.threshold && this.currentProviderIndex < this.config.providers.length - 1) {
+      this.currentProviderIndex++;
+    }
+  }
+  /**
+   * Record a rate limit error (429) for the given provider.
+   * Falls back to the next model in the chain.
+   *
+   * @param _provider - Provider name that was rate limited (parameter name _ to indicate unused)
+   */
+  recordRateLimit(_provider) {
+    if (this.config.models && this.currentModelIndex < this.config.models.length - 1) {
+      this.currentModelIndex++;
+    }
+  }
+  /**
+   * Record a successful call for the given provider.
+   * Resets the error count for that provider.
+   *
+   * @param provider - Provider name that succeeded
+   */
+  recordSuccess(provider) {
+    this.errorCounts.set(provider, 0);
+  }
+  /**
+   * Get the currently active provider.
+   *
+   * @returns Name of the provider to use
+   */
+  currentProvider() {
+    const provider = this.config.providers[this.currentProviderIndex];
+    if (!provider) {
+      throw new Error(`FallbackChain: Invalid provider index ${this.currentProviderIndex}`);
+    }
+    return provider;
+  }
+  /**
+   * Get the currently active model.
+   * Returns undefined if no models are configured.
+   *
+   * @returns Name of the model to use, or undefined if no models configured
+   */
+  currentModel() {
+    return this.config.models?.[this.currentModelIndex];
+  }
+  /**
+   * Check if there are more fallbacks available (provider or model).
+   *
+   * @returns true if there are unused fallback providers or models, false if all exhausted
+   */
+  hasFallback() {
+    const hasProviderFallback = this.currentProviderIndex < this.config.providers.length - 1;
+    const hasModelFallback = this.config.models !== void 0 && this.currentModelIndex < this.config.models.length - 1;
+    return hasProviderFallback || hasModelFallback;
+  }
+};
+// src/validation.ts
+function validateAndRepairMessages(messages) {
+  if (messages.length === 0) return messages;
+  const repaired = [];
+  const toolCallIds = /* @__PURE__ */ new Set();
+  for (let i = 0; i < messages.length; i++) {
+    const msg = messages[i];
+    if (msg.role === "assistant") {
+      const toolCalls = msg.tool_calls ?? msg.toolCalls ?? [];
+      for (const tc of toolCalls) {
+        if (tc.id) toolCallIds.add(tc.id);
+      }
+      const content = typeof msg.content === "string" ? msg.content : "";
+      repaired.push({ ...msg, content: content || "" });
+      continue;
+    }
+    if (msg.role === "tool") {
+      const callId = msg.tool_call_id ?? msg.toolCallId;
+      if (callId && !toolCallIds.has(callId)) {
+        continue;
+      }
+      repaired.push(msg);
+      continue;
+    }
+    if (msg.role === "user" || msg.role === "system") {
+      const content = typeof msg.content === "string" ? msg.content : "";
+      if (!content.trim()) {
+        repaired.push({ ...msg, content: "..." });
+        continue;
+      }
+    }
+    repaired.push(msg);
+  }
+  return repaired;
+}
+// src/adapter.ts
+var defaultAdapter = {
+  continuationHint({ missingTools, toolsUsed, iteration, maxIterations }) {
+    if (missingTools.length === 0) {
+      return toolsUsed.size > 0 ? "You have completed all required tool calls. Now synthesize the results and provide your FINAL ANSWER." : void 0;
+    }
+    const toolList = missingTools.join(", ");
+    const urgency = iteration >= maxIterations - 3 ? ` You have ${maxIterations - iteration} iterations left.` : "";
+    return `You must still call: ${toolList}. Call the next required tool now.${urgency}`;
+  }
+};
+var localModelAdapter = {
+  systemPromptPatch(basePrompt, tier) {
+    if (tier !== "local") return void 0;
+    return basePrompt + "\n\nIMPORTANT: When given a multi-step task, complete ALL steps in sequence. After gathering information, immediately proceed to the next step. Never stop after only searching \u2014 always produce the deliverable.";
+  },
+  taskFraming({ task, requiredTools, tier }) {
+    if (tier !== "local" || requiredTools.length === 0) return void 0;
+    const steps = requiredTools.map((t, i) => `${i + 1}. Call ${t}`).join("\n");
+    return `${task}
+Complete these steps in order:
+${steps}
+Do not stop until all steps are done.`;
+  },
+  toolGuidance({ requiredTools, tier }) {
+    if (tier !== "local" || requiredTools.length === 0) return void 0;
+    return `
+Required tools for this task: ${requiredTools.join(", ")}. You MUST call all of them before giving a final answer.`;
+  },
+  continuationHint({ toolsUsed, missingTools, iteration, maxIterations, lastToolName }) {
+    if (missingTools.length === 0) return void 0;
+    const urgency = iteration >= maxIterations - 2 ? " This is urgent \u2014 you are running low on iterations." : "";
+    if (lastToolName && (lastToolName.includes("search") || lastToolName.includes("http"))) {
+      const writeTools = missingTools.filter((t) => t.includes("write") || t.includes("file"));
+      if (writeTools.length > 0) {
+        return `You have gathered research data. Synthesize the findings and call ${writeTools[0]} to save the output.${urgency} Do NOT search again.`;
+      }
+    }
+    if (missingTools.length === 1) {
+      return `Your next step: call ${missingTools[0]}. You have all the information you need.${urgency}`;
+    }
+    return `Complete these steps in order: ${missingTools.join(" \u2192 ")}.${urgency} Proceed with the first one now.`;
+  },
+  errorRecovery({ toolName, errorContent, missingTools, tier }) {
+    if (tier !== "local") return void 0;
+    const isNotFound = errorContent.includes("404") || errorContent.includes("Not Found");
+    const isTimeout = errorContent.toLowerCase().includes("timeout");
+    if (isNotFound) {
+      return `${toolName} returned 404 \u2014 that URL doesn't exist. Try a different URL or use web-search to find the correct one.${missingTools.length > 0 ? ` You still need to call: ${missingTools.join(", ")}.` : ""}`;
+    }
+    if (isTimeout) {
+      return `${toolName} timed out. Try again with a simpler request, or skip this step and proceed with what you have.`;
+    }
+    return `${toolName} failed. Try an alternative approach or use a different tool to get the information you need.`;
+  },
+  synthesisPrompt({ missingOutputTools, observationCount, tier }) {
+    if (tier !== "local" || missingOutputTools.length === 0) return void 0;
+    return `You have gathered ${observationCount} piece${observationCount !== 1 ? "s" : ""} of information. That is enough. Do NOT search again. Now call ${missingOutputTools[0]} to produce the final output. Synthesize everything you have learned into a complete, well-structured response.`;
+  },
+  qualityCheck({ task, requiredTools, toolsUsed, tier }) {
+    if (tier !== "local") return void 0;
+    const unmet = requiredTools.filter((t) => !toolsUsed.has(t));
+    if (unmet.length > 0) {
+      return `Before finishing: you have not yet called ${unmet.join(", ")}. Call ${unmet[0]} now.`;
+    }
+    return `Review your answer: does it fully address the task "${task.slice(0, 120)}"? If yes, give it. If not, complete the missing parts first.`;
+  }
+};
+var midModelAdapter = {
+  continuationHint({ missingTools, toolsUsed, iteration, maxIterations }) {
+    if (missingTools.length === 0) {
+      return toolsUsed.size > 0 ? "All required tools called. Synthesize and give your final answer." : void 0;
+    }
+    const urgency = iteration >= maxIterations - 2 ? ` (${maxIterations - iteration} steps left)` : "";
+    return `Still needed: ${missingTools.join(", ")}. Call the next one now.${urgency}`;
+  },
+  synthesisPrompt({ missingOutputTools, tier }) {
+    if (tier !== "mid" || missingOutputTools.length === 0) return void 0;
+    return `Research complete. Now call ${missingOutputTools[0]} to produce the output.`;
+  }
+};
+function selectAdapter(_capabilities, tier) {
+  if (tier === "local") return localModelAdapter;
+  if (tier === "mid") return midModelAdapter;
+  return defaultAdapter;
+}
+function recommendStrategyForTier(_tier, _configuredStrategy, _requiredTools) {
+  return void 0;
+}
 export {
   AnthropicProviderLive,
   CacheControlSchema,
   CompletionResponseSchema,
   ComplexityAnalysisSchema,
+  DEFAULT_CAPABILITIES,
   DefaultEmbeddingConfig,
   EmbeddingConfigSchema,
+  FallbackChain,
   GeminiProviderLive,
   ImageContentBlockSchema,
   ImageSourceSchema,
@@ -3621,13 +4422,22 @@ export {
   calculateCost,
   createLLMProviderLayer,
   createLLMProviderLayerWithConfig,
+  defaultAdapter,
   defaultCircuitBreakerConfig,
   estimateTokenCount,
   getProviderDefaultModel,
   llmConfigFromEnv,
+  localModelAdapter,
   makeCacheable,
   makeCircuitBreaker,
   makeEmbeddingCache,
-  retryPolicy
+  makeRateLimitedProvider,
+  makeRateLimiter,
+  openRouterPricingProvider,
+  recommendStrategyForTier,
+  retryPolicy,
+  selectAdapter,
+  urlPricingProvider,
+  validateAndRepairMessages
 };
 //# sourceMappingURL=index.js.map