npm - @reactive-agents/llm-provider - Versions diffs - 0.5.5 → 0.6.0 - Mend

@reactive-agents/llm-provider 0.5.5 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/dist/index.d.ts CHANGED Viewed

@@ -673,6 +673,8 @@ declare const CompletionResponseSchema: Schema.Struct<{
         /** Tool input parameters (arbitrary JSON-compatible object) */
         input: typeof Schema.Unknown;
     }>>>;
+    /** Internal reasoning from thinking models (e.g. <think> blocks from qwen3, DeepSeek-R1) */
+    thinking: Schema.optional<typeof Schema.String>;
 }>;
 /**
  * LLM response to a completion request.
@@ -804,6 +806,20 @@ type ObservabilityVerbosity =
 "metadata"
 /** Capture complete request/response payloads — higher overhead, useful for debugging. */
  | "full";
+/**
+ * Provider-reported capabilities for structured JSON output.
+ * Used by the structured output pipeline to select the optimal extraction strategy.
+ */
+type StructuredOutputCapabilities = {
+    /** Provider supports forcing JSON-only output (OpenAI, Gemini, Ollama) */
+    readonly nativeJsonMode: boolean;
+    /** Provider can enforce a JSON Schema on the output (OpenAI structured outputs) */
+    readonly jsonSchemaEnforcement: boolean;
+    /** Provider supports assistant message prefill to start response with "{" (Anthropic) */
+    readonly prefillSupport: boolean;
+    /** Provider supports GBNF grammar constraints for exact schema matching (Ollama/llama.cpp) */
+    readonly grammarConstraints: boolean;
+};
 declare const LLMError_base: new <A extends Record<string, any> = {}>(args: effect_Types.Equals<A, {}> extends true ? void : { readonly [P in keyof A as P extends "_tag" ? never : P]: A[P]; }) => effect_Cause.YieldableError & {
     readonly _tag: "LLMError";
@@ -904,6 +920,11 @@ declare const LLMService_base: Context.TagClass<LLMService, "LLMService", {
      * Get current model configuration.
      */
     readonly getModelConfig: () => Effect.Effect<ModelConfig, never>;
+    /**
+     * Report structured output capabilities for this provider.
+     * Used by the structured output pipeline to select optimal JSON extraction strategy.
+     */
+    readonly getStructuredOutputCapabilities: () => Effect.Effect<StructuredOutputCapabilities, never>;
 }>;
 /**
  * Core LLM service — all LLM interactions go through this.
@@ -992,6 +1013,15 @@ declare const LLMConfig_base: Context.TagClass<LLMConfig, "LLMConfig", {
      * @default 30000 (30 seconds)
      */
     readonly timeoutMs: number;
+    /**
+     * Enable/disable thinking mode for thinking-capable models.
+     * - `true` — Always enable thinking (e.g., qwen3.5, DeepSeek-R1)
+     * - `false` — Always disable thinking (e.g., cogito:14b that crashes with think:true)
+     * - `undefined` — Auto-detect based on model capabilities (Ollama only)
+     *
+     * @default undefined (auto-detect)
+     */
+    readonly thinking?: boolean;
     /**
      * Default maximum output tokens for LLM responses.
      * Used if a CompletionRequest does not specify maxTokens.
@@ -1155,6 +1185,15 @@ declare const llmConfigFromEnv: {
      * @default 30000 (30 seconds)
      */
     readonly timeoutMs: number;
+    /**
+     * Enable/disable thinking mode for thinking-capable models.
+     * - `true` — Always enable thinking (e.g., qwen3.5, DeepSeek-R1)
+     * - `false` — Always disable thinking (e.g., cogito:14b that crashes with think:true)
+     * - `undefined` — Auto-detect based on model capabilities (Ollama only)
+     *
+     * @default undefined (auto-detect)
+     */
+    readonly thinking?: boolean;
     /**
      * Default maximum output tokens for LLM responses.
      * Used if a CompletionRequest does not specify maxTokens.
@@ -1361,14 +1400,30 @@ declare const ComplexityAnalysisSchema: Schema.Struct<{
 }>;
 type ComplexityAnalysis = Schema.Schema.Type<typeof ComplexityAnalysisSchema>;
+/**
+ * Default model constants for each LLM provider.
+ * Single source of truth — used by providers at construction time
+ * and by the runtime to resolve model names for display/metrics.
+ */
+declare const PROVIDER_DEFAULT_MODELS: Record<string, string>;
+/**
+ * Get the default model for a given provider.
+ * Returns undefined if the provider is not recognized.
+ */
+declare function getProviderDefaultModel(provider: string): string | undefined;
 /**
  * Create the LLM provider layer for a specific provider.
  * Uses env vars for configuration by default.
  */
-declare const createLLMProviderLayer: (provider?: "anthropic" | "openai" | "ollama" | "gemini" | "litellm" | "test", testResponses?: Record<string, string>, model?: string) => Layer.Layer<LLMService | PromptManager, never, never>;
+declare const createLLMProviderLayer: (provider?: "anthropic" | "openai" | "ollama" | "gemini" | "litellm" | "test", testResponses?: Record<string, string>, model?: string, modelParams?: {
+    thinking?: boolean;
+    temperature?: number;
+    maxTokens?: number;
+}) => Layer.Layer<LLMService | PromptManager, never, never>;
 /**
  * LLM layer with custom config (for programmatic use).
  */
 declare const createLLMProviderLayerWithConfig: (config: typeof LLMConfig.Service, provider?: "anthropic" | "openai" | "ollama" | "gemini" | "litellm") => Layer.Layer<LLMService | PromptManager, never, never>;
-export { AnthropicProviderLive, type CacheControl, CacheControlSchema, type CacheableContentBlock, type CompletionRequest, type CompletionResponse, CompletionResponseSchema, type ComplexityAnalysis, ComplexityAnalysisSchema, type ContentBlock, DefaultEmbeddingConfig, type EmbeddingConfig, EmbeddingConfigSchema, GeminiProviderLive, ImageContentBlockSchema, type ImageSource, ImageSourceSchema, LLMConfig, LLMConfigFromEnv, LLMContextOverflowError, LLMError, type LLMErrors, type LLMMessage, LLMParseError, type LLMProvider, LLMProviderType, LLMRateLimitError, LLMService, LLMTimeoutError, LiteLLMProviderLive, LocalProviderLive, type ModelConfig, ModelConfigSchema, type ModelPresetName, ModelPresets, OpenAIProviderLive, type Plan, PlanSchema, PromptManager, PromptManagerLive, type ReActAction, ReActActionSchema, type Reflection, ReflectionSchema, type StopReason, StopReasonSchema, type StrategySelection, StrategySelectionSchema, type StreamEvent, type StructuredCompletionRequest, TestLLMService, TestLLMServiceLayer, TextContentBlockSchema, type ThoughtEvaluation, ThoughtEvaluationSchema, type TokenUsage, TokenUsageSchema, type ToolCall, ToolCallSchema, type ToolDefinition, ToolDefinitionSchema, ToolResultContentBlockSchema, ToolUseContentBlockSchema, type TruncationStrategy, calculateCost, createLLMProviderLayer, createLLMProviderLayerWithConfig, estimateTokenCount, llmConfigFromEnv, makeCacheable, retryPolicy };
+export { AnthropicProviderLive, type CacheControl, CacheControlSchema, type CacheableContentBlock, type CompletionRequest, type CompletionResponse, CompletionResponseSchema, type ComplexityAnalysis, ComplexityAnalysisSchema, type ContentBlock, DefaultEmbeddingConfig, type EmbeddingConfig, EmbeddingConfigSchema, GeminiProviderLive, ImageContentBlockSchema, type ImageSource, ImageSourceSchema, LLMConfig, LLMConfigFromEnv, LLMContextOverflowError, LLMError, type LLMErrors, type LLMMessage, LLMParseError, type LLMProvider, LLMProviderType, LLMRateLimitError, LLMService, LLMTimeoutError, LiteLLMProviderLive, LocalProviderLive, type ModelConfig, ModelConfigSchema, type ModelPresetName, ModelPresets, OpenAIProviderLive, PROVIDER_DEFAULT_MODELS, type Plan, PlanSchema, PromptManager, PromptManagerLive, type ReActAction, ReActActionSchema, type Reflection, ReflectionSchema, type StopReason, StopReasonSchema, type StrategySelection, StrategySelectionSchema, type StreamEvent, type StructuredCompletionRequest, type StructuredOutputCapabilities, TestLLMService, TestLLMServiceLayer, TextContentBlockSchema, type ThoughtEvaluation, ThoughtEvaluationSchema, type TokenUsage, TokenUsageSchema, type ToolCall, ToolCallSchema, type ToolDefinition, ToolDefinitionSchema, ToolResultContentBlockSchema, ToolUseContentBlockSchema, type TruncationStrategy, calculateCost, createLLMProviderLayer, createLLMProviderLayerWithConfig, estimateTokenCount, getProviderDefaultModel, llmConfigFromEnv, makeCacheable, retryPolicy };

package/dist/index.js CHANGED Viewed

@@ -1376,7 +1376,9 @@ var CompletionResponseSchema = Schema.Struct({
   /** Actual model identifier used (may differ from request) */
   model: Schema.String,
   /** Tool calls emitted by the model (if any) */
-  toolCalls: Schema.optional(Schema.Array(ToolCallSchema))
+  toolCalls: Schema.optional(Schema.Array(ToolCallSchema)),
+  /** Internal reasoning from thinking models (e.g. <think> blocks from qwen3, DeepSeek-R1) */
+  thinking: Schema.optional(Schema.String)
 });
 // src/errors.ts
@@ -1405,7 +1407,7 @@ var LLMConfig = class extends Context2.Tag("LLMConfig")() {
 };
 var llmConfigFromEnv = LLMConfig.of({
   defaultProvider: "anthropic",
-  defaultModel: process.env.LLM_DEFAULT_MODEL ?? "claude-sonnet-4-20250514",
+  defaultModel: process.env.LLM_DEFAULT_MODEL || "claude-sonnet-4-20250514",
   anthropicApiKey: process.env.ANTHROPIC_API_KEY,
   openaiApiKey: process.env.OPENAI_API_KEY,
   googleApiKey: process.env.GOOGLE_API_KEY,
@@ -1416,7 +1418,7 @@ var llmConfigFromEnv = LLMConfig.of({
     provider: process.env.EMBEDDING_PROVIDER ?? "openai",
     batchSize: 100
   },
-  supportsPromptCaching: (process.env.LLM_DEFAULT_MODEL ?? "claude-sonnet-4-20250514").startsWith("claude"),
+  supportsPromptCaching: (process.env.LLM_DEFAULT_MODEL || "claude-sonnet-4-20250514").startsWith("claude"),
   maxRetries: Number(process.env.LLM_MAX_RETRIES ?? 3),
   timeoutMs: Number(process.env.LLM_TIMEOUT_MS ?? 3e4),
   defaultMaxTokens: 4096,
@@ -1834,6 +1836,12 @@ No markdown, no code fences, just raw JSON.`
       getModelConfig: () => Effect4.succeed({
         provider: "anthropic",
         model: config.defaultModel
+      }),
+      getStructuredOutputCapabilities: () => Effect4.succeed({
+        nativeJsonMode: false,
+        jsonSchemaEnforcement: false,
+        prefillSupport: true,
+        grammarConstraints: false
       })
     });
   })
@@ -2121,6 +2129,12 @@ No markdown, no code fences, just raw JSON.`
       getModelConfig: () => Effect5.succeed({
         provider: "openai",
         model: defaultModel
+      }),
+      getStructuredOutputCapabilities: () => Effect5.succeed({
+        nativeJsonMode: true,
+        jsonSchemaEnforcement: true,
+        prefillSupport: false,
+        grammarConstraints: false
       })
     });
   })
@@ -2164,12 +2178,29 @@ var mapOpenAIResponse = (response, model) => {
 // src/providers/local.ts
 import { Effect as Effect6, Layer as Layer5, Stream as Stream3, Schema as Schema4 } from "effect";
+// src/provider-defaults.ts
+var PROVIDER_DEFAULT_MODELS = {
+  anthropic: "claude-sonnet-4-20250514",
+  openai: "gpt-4o",
+  ollama: "cogito:14b",
+  gemini: "gemini-2.0-flash",
+  litellm: "gpt-4o",
+  test: "test-model"
+};
+function getProviderDefaultModel(provider) {
+  return PROVIDER_DEFAULT_MODELS[provider];
+}
+// src/providers/local.ts
 var toOllamaMessages = (messages) => messages.map((m) => {
   if (m.role === "tool") {
     return { role: "tool", content: m.content };
   }
   if (m.role === "assistant") {
-    const textContent = typeof m.content === "string" ? m.content : m.content.filter((b) => b.type === "text").map((b) => b.text).join("");
+    const textContent = typeof m.content === "string" ? m.content : m.content.filter(
+      (b) => b.type === "text"
+    ).map((b) => b.text).join("");
     const toolUseBlocks = typeof m.content !== "string" ? m.content.filter(
       (b) => b.type === "tool_use"
     ) : [];
@@ -2189,7 +2220,9 @@ var toOllamaMessages = (messages) => messages.map((m) => {
   }
   return {
     role: m.role,
-    content: typeof m.content === "string" ? m.content : m.content.filter((b) => b.type === "text").map((b) => b.text).join("")
+    content: typeof m.content === "string" ? m.content : m.content.filter(
+      (b) => b.type === "text"
+    ).map((b) => b.text).join("")
   };
 });
 var toOllamaTools = (tools) => {
@@ -2211,12 +2244,50 @@ var parseToolCalls = (toolCalls) => {
     input: tc.function.arguments
   }));
 };
+var thinkingCapabilityCache = /* @__PURE__ */ new Map();
+async function supportsThinking(client, model) {
+  const cached = thinkingCapabilityCache.get(model);
+  if (cached !== void 0) return cached;
+  try {
+    const info = await client.show({ model });
+    const template = info.template ?? "";
+    const result = template.includes("think") || template.includes("<|thinking|>");
+    thinkingCapabilityCache.set(model, result);
+    return result;
+  } catch {
+    thinkingCapabilityCache.set(model, false);
+    return false;
+  }
+}
+async function resolveThinking(client, model, configThinking) {
+  if (configThinking === false) return void 0;
+  if (configThinking === true) return true;
+  const capable = await supportsThinking(client, model);
+  return capable ? true : void 0;
+}
+function ollamaError(error, model) {
+  const msg = error?.message ?? String(error);
+  const status = error?.status_code ?? error?.statusCode;
+  if (status === 404 || /model\s+['"]?\S+['"]?\s+not found/i.test(msg)) {
+    const modelName = model ?? msg.match(/model\s+['"]?(\S+?)['"]?\s+not found/i)?.[1] ?? "unknown";
+    return new LLMError({
+      message: `Model "${modelName}" not found locally. Run: ollama pull ${modelName}`,
+      provider: "ollama",
+      cause: error
+    });
+  }
+  return new LLMError({
+    message: `Ollama request failed: ${msg}`,
+    provider: "ollama",
+    cause: error
+  });
+}
 var LocalProviderLive = Layer5.effect(
   LLMService,
   Effect6.gen(function* () {
     const config = yield* LLMConfig;
     const endpoint = config.ollamaEndpoint ?? "http://localhost:11434";
-    const defaultModel = config.defaultModel.startsWith("claude") || config.defaultModel.startsWith("gpt") ? "llama3" : config.defaultModel;
+    const defaultModel = config.defaultModel.startsWith("claude") || config.defaultModel.startsWith("gpt") ? getProviderDefaultModel("ollama") ?? "cogito:14b" : config.defaultModel;
     const getClient = async () => {
       const { Ollama: Ollama3 } = await Promise.resolve().then(() => (init_dist(), dist_exports));
       return new Ollama3({ host: endpoint });
@@ -2231,11 +2302,17 @@ var LocalProviderLive = Layer5.effect(
             if (request.systemPrompt) {
               msgs.unshift({ role: "system", content: request.systemPrompt });
             }
+            const think = await resolveThinking(
+              client,
+              model,
+              config.thinking
+            );
             return client.chat({
               model,
               messages: msgs,
               tools: toOllamaTools(request.tools),
               stream: false,
+              ...think !== void 0 ? { think } : {},
               keep_alive: "5m",
               options: {
                 temperature: request.temperature ?? config.defaultTemperature,
@@ -2244,13 +2321,10 @@ var LocalProviderLive = Layer5.effect(
               }
             });
           },
-          catch: (error) => new LLMError({
-            message: `Ollama request failed: ${error}`,
-            provider: "ollama",
-            cause: error
-          })
+          catch: (error) => ollamaError(error, model)
         });
         const content = response.message?.content ?? "";
+        const thinkingContent = response.message?.thinking || void 0;
         const inputTokens = response.prompt_eval_count ?? 0;
         const outputTokens = response.eval_count ?? 0;
         const toolCalls = parseToolCalls(
@@ -2268,7 +2342,8 @@ var LocalProviderLive = Layer5.effect(
             // Local models are free
           },
           model: response.model ?? model,
-          toolCalls
+          toolCalls,
+          ...thinkingContent ? { thinking: thinkingContent } : {}
         };
       }).pipe(
         Effect6.retry(retryPolicy),
@@ -2292,13 +2367,22 @@ var LocalProviderLive = Layer5.effect(
               const client = await getClient();
               const msgs = toOllamaMessages(request.messages);
               if (request.systemPrompt) {
-                msgs.unshift({ role: "system", content: request.systemPrompt });
+                msgs.unshift({
+                  role: "system",
+                  content: request.systemPrompt
+                });
               }
+              const think = await resolveThinking(
+                client,
+                model,
+                config.thinking
+              );
               const stream = await client.chat({
                 model,
                 messages: msgs,
                 tools: toOllamaTools(request.tools),
                 stream: true,
+                ...think !== void 0 ? { think } : {},
                 keep_alive: "5m",
                 options: {
                   temperature: request.temperature ?? config.defaultTemperature,
@@ -2332,14 +2416,7 @@ var LocalProviderLive = Layer5.effect(
                 }
               }
             } catch (error) {
-              const err = error;
-              emit.fail(
-                new LLMError({
-                  message: err.message ?? String(error),
-                  provider: "ollama",
-                  cause: error
-                })
-              );
+              emit.fail(ollamaError(error, model));
             }
           };
           void doStream();
@@ -2404,18 +2481,14 @@ No markdown, no code fences, just raw JSON.`
                 }
               });
             },
-            catch: (error) => new LLMError({
-              message: `Ollama request failed: ${error}`,
-              provider: "ollama",
-              cause: error
-            })
+            catch: (error) => ollamaError(error, model)
           });
           const content = response.message?.content ?? "";
           try {
             const parsed = JSON.parse(content);
-            const decoded = Schema4.decodeUnknownEither(
-              request.outputSchema
-            )(parsed);
+            const decoded = Schema4.decodeUnknownEither(request.outputSchema)(
+              parsed
+            );
             if (decoded._tag === "Right") {
               return decoded.right;
             }
@@ -2442,11 +2515,10 @@ No markdown, no code fences, just raw JSON.`
           });
           return response.embeddings;
         },
-        catch: (error) => new LLMError({
-          message: `Embedding failed: ${error}`,
-          provider: "ollama",
-          cause: error
-        })
+        catch: (error) => ollamaError(
+          error,
+          model ?? config.embeddingConfig.model ?? "nomic-embed-text"
+        )
       }),
       countTokens: (messages) => Effect6.gen(function* () {
         return yield* estimateTokenCount(messages);
@@ -2454,6 +2526,12 @@ No markdown, no code fences, just raw JSON.`
       getModelConfig: () => Effect6.succeed({
         provider: "ollama",
         model: defaultModel
+      }),
+      getStructuredOutputCapabilities: () => Effect6.succeed({
+        nativeJsonMode: true,
+        jsonSchemaEnforcement: false,
+        prefillSupport: false,
+        grammarConstraints: true
       })
     });
   })
@@ -2770,6 +2848,12 @@ No markdown, no code fences, just raw JSON.`
       getModelConfig: () => Effect7.succeed({
         provider: "gemini",
         model: config.defaultModel
+      }),
+      getStructuredOutputCapabilities: () => Effect7.succeed({
+        nativeJsonMode: true,
+        jsonSchemaEnforcement: false,
+        prefillSupport: false,
+        grammarConstraints: false
       })
     });
   })
@@ -3109,6 +3193,12 @@ No markdown, no code fences, just raw JSON.`
       getModelConfig: () => Effect8.succeed({
         provider: "litellm",
         model: defaultModel
+      }),
+      getStructuredOutputCapabilities: () => Effect8.succeed({
+        nativeJsonMode: false,
+        jsonSchemaEnforcement: false,
+        prefillSupport: false,
+        grammarConstraints: false
       })
     });
   })
@@ -3149,25 +3239,42 @@ var TestLLMService = (responses) => ({
       model: "test-model"
     };
   }),
-  stream: (_request) => Effect9.succeed(
-    Stream6.make(
-      { type: "text_delta", text: "Test " },
-      { type: "text_delta", text: "response" },
-      {
-        type: "content_complete",
-        content: "Test response"
-      },
-      {
-        type: "usage",
-        usage: {
-          inputTokens: 0,
-          outputTokens: 0,
-          totalTokens: 0,
-          estimatedCost: 0
-        }
+  stream: (request) => {
+    const lastMessage = request.messages[request.messages.length - 1];
+    const content = lastMessage && typeof lastMessage.content === "string" ? lastMessage.content : "";
+    const systemPrompt = typeof request.systemPrompt === "string" ? request.systemPrompt : "";
+    const searchText = `${content} ${systemPrompt}`;
+    let matchedResponse = "Test response";
+    for (const [pattern, response] of Object.entries(responses)) {
+      if (pattern.length > 0 && searchText.includes(pattern)) {
+        matchedResponse = response;
+        break;
       }
-    )
-  ),
+    }
+    const inputTokens = Math.ceil(content.length / 4);
+    const outputTokens = Math.ceil(matchedResponse.length / 4);
+    return Effect9.succeed(
+      Stream6.make(
+        {
+          type: "text_delta",
+          text: matchedResponse
+        },
+        {
+          type: "content_complete",
+          content: matchedResponse
+        },
+        {
+          type: "usage",
+          usage: {
+            inputTokens,
+            outputTokens,
+            totalTokens: inputTokens + outputTokens,
+            estimatedCost: 0
+          }
+        }
+      )
+    );
+  },
   completeStructured: (request) => Effect9.gen(function* () {
     const lastMessage = request.messages[request.messages.length - 1];
     const content = lastMessage && typeof lastMessage.content === "string" ? lastMessage.content : "";
@@ -3193,6 +3300,12 @@ var TestLLMService = (responses) => ({
   getModelConfig: () => Effect9.succeed({
     provider: "anthropic",
     model: "test-model"
+  }),
+  getStructuredOutputCapabilities: () => Effect9.succeed({
+    nativeJsonMode: true,
+    jsonSchemaEnforcement: false,
+    prefillSupport: false,
+    grammarConstraints: false
   })
 });
 var TestLLMServiceLayer = (responses = {}) => Layer8.succeed(LLMService, LLMService.of(TestLLMService(responses)));
@@ -3263,14 +3376,19 @@ var ComplexityAnalysisSchema = Schema8.Struct({
 // src/runtime.ts
 import { Layer as Layer9 } from "effect";
-var createLLMProviderLayer = (provider = "anthropic", testResponses, model) => {
+var createLLMProviderLayer = (provider = "anthropic", testResponses, model, modelParams) => {
   if (provider === "test") {
     return Layer9.mergeAll(
       TestLLMServiceLayer(testResponses ?? {}),
       PromptManagerLive
     );
   }
-  const configLayer = model ? Layer9.succeed(LLMConfig, LLMConfig.of({ ...llmConfigFromEnv, defaultModel: model })) : LLMConfigFromEnv;
+  const configOverrides = {};
+  if (model) configOverrides.defaultModel = model;
+  if (modelParams?.thinking !== void 0) configOverrides.thinking = modelParams.thinking;
+  if (modelParams?.temperature !== void 0) configOverrides.defaultTemperature = modelParams.temperature;
+  if (modelParams?.maxTokens !== void 0) configOverrides.defaultMaxTokens = modelParams.maxTokens;
+  const configLayer = Object.keys(configOverrides).length > 0 ? Layer9.succeed(LLMConfig, LLMConfig.of({ ...llmConfigFromEnv, ...configOverrides })) : LLMConfigFromEnv;
   const providerLayer = provider === "anthropic" ? AnthropicProviderLive : provider === "openai" ? OpenAIProviderLive : provider === "gemini" ? GeminiProviderLive : provider === "litellm" ? LiteLLMProviderLive : LocalProviderLive;
   return Layer9.mergeAll(
     providerLayer.pipe(Layer9.provide(configLayer)),
@@ -3309,6 +3427,7 @@ export {
   ModelConfigSchema,
   ModelPresets,
   OpenAIProviderLive,
+  PROVIDER_DEFAULT_MODELS,
   PlanSchema,
   PromptManager,
   PromptManagerLive,
@@ -3329,6 +3448,7 @@ export {
   createLLMProviderLayer,
   createLLMProviderLayerWithConfig,
   estimateTokenCount,
+  getProviderDefaultModel,
   llmConfigFromEnv,
   makeCacheable,
   retryPolicy