@reactive-agents/llm-provider 0.5.5 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -673,6 +673,8 @@ declare const CompletionResponseSchema: Schema.Struct<{
673
673
  /** Tool input parameters (arbitrary JSON-compatible object) */
674
674
  input: typeof Schema.Unknown;
675
675
  }>>>;
676
+ /** Internal reasoning from thinking models (e.g. <think> blocks from qwen3, DeepSeek-R1) */
677
+ thinking: Schema.optional<typeof Schema.String>;
676
678
  }>;
677
679
  /**
678
680
  * LLM response to a completion request.
@@ -804,6 +806,20 @@ type ObservabilityVerbosity =
804
806
  "metadata"
805
807
  /** Capture complete request/response payloads — higher overhead, useful for debugging. */
806
808
  | "full";
809
+ /**
810
+ * Provider-reported capabilities for structured JSON output.
811
+ * Used by the structured output pipeline to select the optimal extraction strategy.
812
+ */
813
+ type StructuredOutputCapabilities = {
814
+ /** Provider supports forcing JSON-only output (OpenAI, Gemini, Ollama) */
815
+ readonly nativeJsonMode: boolean;
816
+ /** Provider can enforce a JSON Schema on the output (OpenAI structured outputs) */
817
+ readonly jsonSchemaEnforcement: boolean;
818
+ /** Provider supports assistant message prefill to start response with "{" (Anthropic) */
819
+ readonly prefillSupport: boolean;
820
+ /** Provider supports GBNF grammar constraints for exact schema matching (Ollama/llama.cpp) */
821
+ readonly grammarConstraints: boolean;
822
+ };
807
823
 
808
824
  declare const LLMError_base: new <A extends Record<string, any> = {}>(args: effect_Types.Equals<A, {}> extends true ? void : { readonly [P in keyof A as P extends "_tag" ? never : P]: A[P]; }) => effect_Cause.YieldableError & {
809
825
  readonly _tag: "LLMError";
@@ -904,6 +920,11 @@ declare const LLMService_base: Context.TagClass<LLMService, "LLMService", {
904
920
  * Get current model configuration.
905
921
  */
906
922
  readonly getModelConfig: () => Effect.Effect<ModelConfig, never>;
923
+ /**
924
+ * Report structured output capabilities for this provider.
925
+ * Used by the structured output pipeline to select optimal JSON extraction strategy.
926
+ */
927
+ readonly getStructuredOutputCapabilities: () => Effect.Effect<StructuredOutputCapabilities, never>;
907
928
  }>;
908
929
  /**
909
930
  * Core LLM service — all LLM interactions go through this.
@@ -992,6 +1013,15 @@ declare const LLMConfig_base: Context.TagClass<LLMConfig, "LLMConfig", {
992
1013
  * @default 30000 (30 seconds)
993
1014
  */
994
1015
  readonly timeoutMs: number;
1016
+ /**
1017
+ * Enable/disable thinking mode for thinking-capable models.
1018
+ * - `true` — Always enable thinking (e.g., qwen3.5, DeepSeek-R1)
1019
+ * - `false` — Always disable thinking (e.g., cogito:14b that crashes with think:true)
1020
+ * - `undefined` — Auto-detect based on model capabilities (Ollama only)
1021
+ *
1022
+ * @default undefined (auto-detect)
1023
+ */
1024
+ readonly thinking?: boolean;
995
1025
  /**
996
1026
  * Default maximum output tokens for LLM responses.
997
1027
  * Used if a CompletionRequest does not specify maxTokens.
@@ -1155,6 +1185,15 @@ declare const llmConfigFromEnv: {
1155
1185
  * @default 30000 (30 seconds)
1156
1186
  */
1157
1187
  readonly timeoutMs: number;
1188
+ /**
1189
+ * Enable/disable thinking mode for thinking-capable models.
1190
+ * - `true` — Always enable thinking (e.g., qwen3.5, DeepSeek-R1)
1191
+ * - `false` — Always disable thinking (e.g., cogito:14b that crashes with think:true)
1192
+ * - `undefined` — Auto-detect based on model capabilities (Ollama only)
1193
+ *
1194
+ * @default undefined (auto-detect)
1195
+ */
1196
+ readonly thinking?: boolean;
1158
1197
  /**
1159
1198
  * Default maximum output tokens for LLM responses.
1160
1199
  * Used if a CompletionRequest does not specify maxTokens.
@@ -1361,14 +1400,30 @@ declare const ComplexityAnalysisSchema: Schema.Struct<{
1361
1400
  }>;
1362
1401
  type ComplexityAnalysis = Schema.Schema.Type<typeof ComplexityAnalysisSchema>;
1363
1402
 
1403
+ /**
1404
+ * Default model constants for each LLM provider.
1405
+ * Single source of truth — used by providers at construction time
1406
+ * and by the runtime to resolve model names for display/metrics.
1407
+ */
1408
+ declare const PROVIDER_DEFAULT_MODELS: Record<string, string>;
1409
+ /**
1410
+ * Get the default model for a given provider.
1411
+ * Returns undefined if the provider is not recognized.
1412
+ */
1413
+ declare function getProviderDefaultModel(provider: string): string | undefined;
1414
+
1364
1415
  /**
1365
1416
  * Create the LLM provider layer for a specific provider.
1366
1417
  * Uses env vars for configuration by default.
1367
1418
  */
1368
- declare const createLLMProviderLayer: (provider?: "anthropic" | "openai" | "ollama" | "gemini" | "litellm" | "test", testResponses?: Record<string, string>, model?: string) => Layer.Layer<LLMService | PromptManager, never, never>;
1419
+ declare const createLLMProviderLayer: (provider?: "anthropic" | "openai" | "ollama" | "gemini" | "litellm" | "test", testResponses?: Record<string, string>, model?: string, modelParams?: {
1420
+ thinking?: boolean;
1421
+ temperature?: number;
1422
+ maxTokens?: number;
1423
+ }) => Layer.Layer<LLMService | PromptManager, never, never>;
1369
1424
  /**
1370
1425
  * LLM layer with custom config (for programmatic use).
1371
1426
  */
1372
1427
  declare const createLLMProviderLayerWithConfig: (config: typeof LLMConfig.Service, provider?: "anthropic" | "openai" | "ollama" | "gemini" | "litellm") => Layer.Layer<LLMService | PromptManager, never, never>;
1373
1428
 
1374
- export { AnthropicProviderLive, type CacheControl, CacheControlSchema, type CacheableContentBlock, type CompletionRequest, type CompletionResponse, CompletionResponseSchema, type ComplexityAnalysis, ComplexityAnalysisSchema, type ContentBlock, DefaultEmbeddingConfig, type EmbeddingConfig, EmbeddingConfigSchema, GeminiProviderLive, ImageContentBlockSchema, type ImageSource, ImageSourceSchema, LLMConfig, LLMConfigFromEnv, LLMContextOverflowError, LLMError, type LLMErrors, type LLMMessage, LLMParseError, type LLMProvider, LLMProviderType, LLMRateLimitError, LLMService, LLMTimeoutError, LiteLLMProviderLive, LocalProviderLive, type ModelConfig, ModelConfigSchema, type ModelPresetName, ModelPresets, OpenAIProviderLive, type Plan, PlanSchema, PromptManager, PromptManagerLive, type ReActAction, ReActActionSchema, type Reflection, ReflectionSchema, type StopReason, StopReasonSchema, type StrategySelection, StrategySelectionSchema, type StreamEvent, type StructuredCompletionRequest, TestLLMService, TestLLMServiceLayer, TextContentBlockSchema, type ThoughtEvaluation, ThoughtEvaluationSchema, type TokenUsage, TokenUsageSchema, type ToolCall, ToolCallSchema, type ToolDefinition, ToolDefinitionSchema, ToolResultContentBlockSchema, ToolUseContentBlockSchema, type TruncationStrategy, calculateCost, createLLMProviderLayer, createLLMProviderLayerWithConfig, estimateTokenCount, llmConfigFromEnv, makeCacheable, retryPolicy };
1429
+ export { AnthropicProviderLive, type CacheControl, CacheControlSchema, type CacheableContentBlock, type CompletionRequest, type CompletionResponse, CompletionResponseSchema, type ComplexityAnalysis, ComplexityAnalysisSchema, type ContentBlock, DefaultEmbeddingConfig, type EmbeddingConfig, EmbeddingConfigSchema, GeminiProviderLive, ImageContentBlockSchema, type ImageSource, ImageSourceSchema, LLMConfig, LLMConfigFromEnv, LLMContextOverflowError, LLMError, type LLMErrors, type LLMMessage, LLMParseError, type LLMProvider, LLMProviderType, LLMRateLimitError, LLMService, LLMTimeoutError, LiteLLMProviderLive, LocalProviderLive, type ModelConfig, ModelConfigSchema, type ModelPresetName, ModelPresets, OpenAIProviderLive, PROVIDER_DEFAULT_MODELS, type Plan, PlanSchema, PromptManager, PromptManagerLive, type ReActAction, ReActActionSchema, type Reflection, ReflectionSchema, type StopReason, StopReasonSchema, type StrategySelection, StrategySelectionSchema, type StreamEvent, type StructuredCompletionRequest, type StructuredOutputCapabilities, TestLLMService, TestLLMServiceLayer, TextContentBlockSchema, type ThoughtEvaluation, ThoughtEvaluationSchema, type TokenUsage, TokenUsageSchema, type ToolCall, ToolCallSchema, type ToolDefinition, ToolDefinitionSchema, ToolResultContentBlockSchema, ToolUseContentBlockSchema, type TruncationStrategy, calculateCost, createLLMProviderLayer, createLLMProviderLayerWithConfig, estimateTokenCount, getProviderDefaultModel, llmConfigFromEnv, makeCacheable, retryPolicy };
package/dist/index.js CHANGED
@@ -1376,7 +1376,9 @@ var CompletionResponseSchema = Schema.Struct({
1376
1376
  /** Actual model identifier used (may differ from request) */
1377
1377
  model: Schema.String,
1378
1378
  /** Tool calls emitted by the model (if any) */
1379
- toolCalls: Schema.optional(Schema.Array(ToolCallSchema))
1379
+ toolCalls: Schema.optional(Schema.Array(ToolCallSchema)),
1380
+ /** Internal reasoning from thinking models (e.g. <think> blocks from qwen3, DeepSeek-R1) */
1381
+ thinking: Schema.optional(Schema.String)
1380
1382
  });
1381
1383
 
1382
1384
  // src/errors.ts
@@ -1405,7 +1407,7 @@ var LLMConfig = class extends Context2.Tag("LLMConfig")() {
1405
1407
  };
1406
1408
  var llmConfigFromEnv = LLMConfig.of({
1407
1409
  defaultProvider: "anthropic",
1408
- defaultModel: process.env.LLM_DEFAULT_MODEL ?? "claude-sonnet-4-20250514",
1410
+ defaultModel: process.env.LLM_DEFAULT_MODEL || "claude-sonnet-4-20250514",
1409
1411
  anthropicApiKey: process.env.ANTHROPIC_API_KEY,
1410
1412
  openaiApiKey: process.env.OPENAI_API_KEY,
1411
1413
  googleApiKey: process.env.GOOGLE_API_KEY,
@@ -1416,7 +1418,7 @@ var llmConfigFromEnv = LLMConfig.of({
1416
1418
  provider: process.env.EMBEDDING_PROVIDER ?? "openai",
1417
1419
  batchSize: 100
1418
1420
  },
1419
- supportsPromptCaching: (process.env.LLM_DEFAULT_MODEL ?? "claude-sonnet-4-20250514").startsWith("claude"),
1421
+ supportsPromptCaching: (process.env.LLM_DEFAULT_MODEL || "claude-sonnet-4-20250514").startsWith("claude"),
1420
1422
  maxRetries: Number(process.env.LLM_MAX_RETRIES ?? 3),
1421
1423
  timeoutMs: Number(process.env.LLM_TIMEOUT_MS ?? 3e4),
1422
1424
  defaultMaxTokens: 4096,
@@ -1834,6 +1836,12 @@ No markdown, no code fences, just raw JSON.`
1834
1836
  getModelConfig: () => Effect4.succeed({
1835
1837
  provider: "anthropic",
1836
1838
  model: config.defaultModel
1839
+ }),
1840
+ getStructuredOutputCapabilities: () => Effect4.succeed({
1841
+ nativeJsonMode: false,
1842
+ jsonSchemaEnforcement: false,
1843
+ prefillSupport: true,
1844
+ grammarConstraints: false
1837
1845
  })
1838
1846
  });
1839
1847
  })
@@ -2121,6 +2129,12 @@ No markdown, no code fences, just raw JSON.`
2121
2129
  getModelConfig: () => Effect5.succeed({
2122
2130
  provider: "openai",
2123
2131
  model: defaultModel
2132
+ }),
2133
+ getStructuredOutputCapabilities: () => Effect5.succeed({
2134
+ nativeJsonMode: true,
2135
+ jsonSchemaEnforcement: true,
2136
+ prefillSupport: false,
2137
+ grammarConstraints: false
2124
2138
  })
2125
2139
  });
2126
2140
  })
@@ -2164,12 +2178,29 @@ var mapOpenAIResponse = (response, model) => {
2164
2178
 
2165
2179
  // src/providers/local.ts
2166
2180
  import { Effect as Effect6, Layer as Layer5, Stream as Stream3, Schema as Schema4 } from "effect";
2181
+
2182
+ // src/provider-defaults.ts
2183
+ var PROVIDER_DEFAULT_MODELS = {
2184
+ anthropic: "claude-sonnet-4-20250514",
2185
+ openai: "gpt-4o",
2186
+ ollama: "cogito:14b",
2187
+ gemini: "gemini-2.0-flash",
2188
+ litellm: "gpt-4o",
2189
+ test: "test-model"
2190
+ };
2191
+ function getProviderDefaultModel(provider) {
2192
+ return PROVIDER_DEFAULT_MODELS[provider];
2193
+ }
2194
+
2195
+ // src/providers/local.ts
2167
2196
  var toOllamaMessages = (messages) => messages.map((m) => {
2168
2197
  if (m.role === "tool") {
2169
2198
  return { role: "tool", content: m.content };
2170
2199
  }
2171
2200
  if (m.role === "assistant") {
2172
- const textContent = typeof m.content === "string" ? m.content : m.content.filter((b) => b.type === "text").map((b) => b.text).join("");
2201
+ const textContent = typeof m.content === "string" ? m.content : m.content.filter(
2202
+ (b) => b.type === "text"
2203
+ ).map((b) => b.text).join("");
2173
2204
  const toolUseBlocks = typeof m.content !== "string" ? m.content.filter(
2174
2205
  (b) => b.type === "tool_use"
2175
2206
  ) : [];
@@ -2189,7 +2220,9 @@ var toOllamaMessages = (messages) => messages.map((m) => {
2189
2220
  }
2190
2221
  return {
2191
2222
  role: m.role,
2192
- content: typeof m.content === "string" ? m.content : m.content.filter((b) => b.type === "text").map((b) => b.text).join("")
2223
+ content: typeof m.content === "string" ? m.content : m.content.filter(
2224
+ (b) => b.type === "text"
2225
+ ).map((b) => b.text).join("")
2193
2226
  };
2194
2227
  });
2195
2228
  var toOllamaTools = (tools) => {
@@ -2211,12 +2244,50 @@ var parseToolCalls = (toolCalls) => {
2211
2244
  input: tc.function.arguments
2212
2245
  }));
2213
2246
  };
2247
+ var thinkingCapabilityCache = /* @__PURE__ */ new Map();
2248
+ async function supportsThinking(client, model) {
2249
+ const cached = thinkingCapabilityCache.get(model);
2250
+ if (cached !== void 0) return cached;
2251
+ try {
2252
+ const info = await client.show({ model });
2253
+ const template = info.template ?? "";
2254
+ const result = template.includes("think") || template.includes("<|thinking|>");
2255
+ thinkingCapabilityCache.set(model, result);
2256
+ return result;
2257
+ } catch {
2258
+ thinkingCapabilityCache.set(model, false);
2259
+ return false;
2260
+ }
2261
+ }
2262
+ async function resolveThinking(client, model, configThinking) {
2263
+ if (configThinking === false) return void 0;
2264
+ if (configThinking === true) return true;
2265
+ const capable = await supportsThinking(client, model);
2266
+ return capable ? true : void 0;
2267
+ }
2268
+ function ollamaError(error, model) {
2269
+ const msg = error?.message ?? String(error);
2270
+ const status = error?.status_code ?? error?.statusCode;
2271
+ if (status === 404 || /model\s+['"]?\S+['"]?\s+not found/i.test(msg)) {
2272
+ const modelName = model ?? msg.match(/model\s+['"]?(\S+?)['"]?\s+not found/i)?.[1] ?? "unknown";
2273
+ return new LLMError({
2274
+ message: `Model "${modelName}" not found locally. Run: ollama pull ${modelName}`,
2275
+ provider: "ollama",
2276
+ cause: error
2277
+ });
2278
+ }
2279
+ return new LLMError({
2280
+ message: `Ollama request failed: ${msg}`,
2281
+ provider: "ollama",
2282
+ cause: error
2283
+ });
2284
+ }
2214
2285
  var LocalProviderLive = Layer5.effect(
2215
2286
  LLMService,
2216
2287
  Effect6.gen(function* () {
2217
2288
  const config = yield* LLMConfig;
2218
2289
  const endpoint = config.ollamaEndpoint ?? "http://localhost:11434";
2219
- const defaultModel = config.defaultModel.startsWith("claude") || config.defaultModel.startsWith("gpt") ? "llama3" : config.defaultModel;
2290
+ const defaultModel = config.defaultModel.startsWith("claude") || config.defaultModel.startsWith("gpt") ? getProviderDefaultModel("ollama") ?? "cogito:14b" : config.defaultModel;
2220
2291
  const getClient = async () => {
2221
2292
  const { Ollama: Ollama3 } = await Promise.resolve().then(() => (init_dist(), dist_exports));
2222
2293
  return new Ollama3({ host: endpoint });
@@ -2231,11 +2302,17 @@ var LocalProviderLive = Layer5.effect(
2231
2302
  if (request.systemPrompt) {
2232
2303
  msgs.unshift({ role: "system", content: request.systemPrompt });
2233
2304
  }
2305
+ const think = await resolveThinking(
2306
+ client,
2307
+ model,
2308
+ config.thinking
2309
+ );
2234
2310
  return client.chat({
2235
2311
  model,
2236
2312
  messages: msgs,
2237
2313
  tools: toOllamaTools(request.tools),
2238
2314
  stream: false,
2315
+ ...think !== void 0 ? { think } : {},
2239
2316
  keep_alive: "5m",
2240
2317
  options: {
2241
2318
  temperature: request.temperature ?? config.defaultTemperature,
@@ -2244,13 +2321,10 @@ var LocalProviderLive = Layer5.effect(
2244
2321
  }
2245
2322
  });
2246
2323
  },
2247
- catch: (error) => new LLMError({
2248
- message: `Ollama request failed: ${error}`,
2249
- provider: "ollama",
2250
- cause: error
2251
- })
2324
+ catch: (error) => ollamaError(error, model)
2252
2325
  });
2253
2326
  const content = response.message?.content ?? "";
2327
+ const thinkingContent = response.message?.thinking || void 0;
2254
2328
  const inputTokens = response.prompt_eval_count ?? 0;
2255
2329
  const outputTokens = response.eval_count ?? 0;
2256
2330
  const toolCalls = parseToolCalls(
@@ -2268,7 +2342,8 @@ var LocalProviderLive = Layer5.effect(
2268
2342
  // Local models are free
2269
2343
  },
2270
2344
  model: response.model ?? model,
2271
- toolCalls
2345
+ toolCalls,
2346
+ ...thinkingContent ? { thinking: thinkingContent } : {}
2272
2347
  };
2273
2348
  }).pipe(
2274
2349
  Effect6.retry(retryPolicy),
@@ -2292,13 +2367,22 @@ var LocalProviderLive = Layer5.effect(
2292
2367
  const client = await getClient();
2293
2368
  const msgs = toOllamaMessages(request.messages);
2294
2369
  if (request.systemPrompt) {
2295
- msgs.unshift({ role: "system", content: request.systemPrompt });
2370
+ msgs.unshift({
2371
+ role: "system",
2372
+ content: request.systemPrompt
2373
+ });
2296
2374
  }
2375
+ const think = await resolveThinking(
2376
+ client,
2377
+ model,
2378
+ config.thinking
2379
+ );
2297
2380
  const stream = await client.chat({
2298
2381
  model,
2299
2382
  messages: msgs,
2300
2383
  tools: toOllamaTools(request.tools),
2301
2384
  stream: true,
2385
+ ...think !== void 0 ? { think } : {},
2302
2386
  keep_alive: "5m",
2303
2387
  options: {
2304
2388
  temperature: request.temperature ?? config.defaultTemperature,
@@ -2332,14 +2416,7 @@ var LocalProviderLive = Layer5.effect(
2332
2416
  }
2333
2417
  }
2334
2418
  } catch (error) {
2335
- const err = error;
2336
- emit.fail(
2337
- new LLMError({
2338
- message: err.message ?? String(error),
2339
- provider: "ollama",
2340
- cause: error
2341
- })
2342
- );
2419
+ emit.fail(ollamaError(error, model));
2343
2420
  }
2344
2421
  };
2345
2422
  void doStream();
@@ -2404,18 +2481,14 @@ No markdown, no code fences, just raw JSON.`
2404
2481
  }
2405
2482
  });
2406
2483
  },
2407
- catch: (error) => new LLMError({
2408
- message: `Ollama request failed: ${error}`,
2409
- provider: "ollama",
2410
- cause: error
2411
- })
2484
+ catch: (error) => ollamaError(error, model)
2412
2485
  });
2413
2486
  const content = response.message?.content ?? "";
2414
2487
  try {
2415
2488
  const parsed = JSON.parse(content);
2416
- const decoded = Schema4.decodeUnknownEither(
2417
- request.outputSchema
2418
- )(parsed);
2489
+ const decoded = Schema4.decodeUnknownEither(request.outputSchema)(
2490
+ parsed
2491
+ );
2419
2492
  if (decoded._tag === "Right") {
2420
2493
  return decoded.right;
2421
2494
  }
@@ -2442,11 +2515,10 @@ No markdown, no code fences, just raw JSON.`
2442
2515
  });
2443
2516
  return response.embeddings;
2444
2517
  },
2445
- catch: (error) => new LLMError({
2446
- message: `Embedding failed: ${error}`,
2447
- provider: "ollama",
2448
- cause: error
2449
- })
2518
+ catch: (error) => ollamaError(
2519
+ error,
2520
+ model ?? config.embeddingConfig.model ?? "nomic-embed-text"
2521
+ )
2450
2522
  }),
2451
2523
  countTokens: (messages) => Effect6.gen(function* () {
2452
2524
  return yield* estimateTokenCount(messages);
@@ -2454,6 +2526,12 @@ No markdown, no code fences, just raw JSON.`
2454
2526
  getModelConfig: () => Effect6.succeed({
2455
2527
  provider: "ollama",
2456
2528
  model: defaultModel
2529
+ }),
2530
+ getStructuredOutputCapabilities: () => Effect6.succeed({
2531
+ nativeJsonMode: true,
2532
+ jsonSchemaEnforcement: false,
2533
+ prefillSupport: false,
2534
+ grammarConstraints: true
2457
2535
  })
2458
2536
  });
2459
2537
  })
@@ -2770,6 +2848,12 @@ No markdown, no code fences, just raw JSON.`
2770
2848
  getModelConfig: () => Effect7.succeed({
2771
2849
  provider: "gemini",
2772
2850
  model: config.defaultModel
2851
+ }),
2852
+ getStructuredOutputCapabilities: () => Effect7.succeed({
2853
+ nativeJsonMode: true,
2854
+ jsonSchemaEnforcement: false,
2855
+ prefillSupport: false,
2856
+ grammarConstraints: false
2773
2857
  })
2774
2858
  });
2775
2859
  })
@@ -3109,6 +3193,12 @@ No markdown, no code fences, just raw JSON.`
3109
3193
  getModelConfig: () => Effect8.succeed({
3110
3194
  provider: "litellm",
3111
3195
  model: defaultModel
3196
+ }),
3197
+ getStructuredOutputCapabilities: () => Effect8.succeed({
3198
+ nativeJsonMode: false,
3199
+ jsonSchemaEnforcement: false,
3200
+ prefillSupport: false,
3201
+ grammarConstraints: false
3112
3202
  })
3113
3203
  });
3114
3204
  })
@@ -3149,25 +3239,42 @@ var TestLLMService = (responses) => ({
3149
3239
  model: "test-model"
3150
3240
  };
3151
3241
  }),
3152
- stream: (_request) => Effect9.succeed(
3153
- Stream6.make(
3154
- { type: "text_delta", text: "Test " },
3155
- { type: "text_delta", text: "response" },
3156
- {
3157
- type: "content_complete",
3158
- content: "Test response"
3159
- },
3160
- {
3161
- type: "usage",
3162
- usage: {
3163
- inputTokens: 0,
3164
- outputTokens: 0,
3165
- totalTokens: 0,
3166
- estimatedCost: 0
3167
- }
3242
+ stream: (request) => {
3243
+ const lastMessage = request.messages[request.messages.length - 1];
3244
+ const content = lastMessage && typeof lastMessage.content === "string" ? lastMessage.content : "";
3245
+ const systemPrompt = typeof request.systemPrompt === "string" ? request.systemPrompt : "";
3246
+ const searchText = `${content} ${systemPrompt}`;
3247
+ let matchedResponse = "Test response";
3248
+ for (const [pattern, response] of Object.entries(responses)) {
3249
+ if (pattern.length > 0 && searchText.includes(pattern)) {
3250
+ matchedResponse = response;
3251
+ break;
3168
3252
  }
3169
- )
3170
- ),
3253
+ }
3254
+ const inputTokens = Math.ceil(content.length / 4);
3255
+ const outputTokens = Math.ceil(matchedResponse.length / 4);
3256
+ return Effect9.succeed(
3257
+ Stream6.make(
3258
+ {
3259
+ type: "text_delta",
3260
+ text: matchedResponse
3261
+ },
3262
+ {
3263
+ type: "content_complete",
3264
+ content: matchedResponse
3265
+ },
3266
+ {
3267
+ type: "usage",
3268
+ usage: {
3269
+ inputTokens,
3270
+ outputTokens,
3271
+ totalTokens: inputTokens + outputTokens,
3272
+ estimatedCost: 0
3273
+ }
3274
+ }
3275
+ )
3276
+ );
3277
+ },
3171
3278
  completeStructured: (request) => Effect9.gen(function* () {
3172
3279
  const lastMessage = request.messages[request.messages.length - 1];
3173
3280
  const content = lastMessage && typeof lastMessage.content === "string" ? lastMessage.content : "";
@@ -3193,6 +3300,12 @@ var TestLLMService = (responses) => ({
3193
3300
  getModelConfig: () => Effect9.succeed({
3194
3301
  provider: "anthropic",
3195
3302
  model: "test-model"
3303
+ }),
3304
+ getStructuredOutputCapabilities: () => Effect9.succeed({
3305
+ nativeJsonMode: true,
3306
+ jsonSchemaEnforcement: false,
3307
+ prefillSupport: false,
3308
+ grammarConstraints: false
3196
3309
  })
3197
3310
  });
3198
3311
  var TestLLMServiceLayer = (responses = {}) => Layer8.succeed(LLMService, LLMService.of(TestLLMService(responses)));
@@ -3263,14 +3376,19 @@ var ComplexityAnalysisSchema = Schema8.Struct({
3263
3376
 
3264
3377
  // src/runtime.ts
3265
3378
  import { Layer as Layer9 } from "effect";
3266
- var createLLMProviderLayer = (provider = "anthropic", testResponses, model) => {
3379
+ var createLLMProviderLayer = (provider = "anthropic", testResponses, model, modelParams) => {
3267
3380
  if (provider === "test") {
3268
3381
  return Layer9.mergeAll(
3269
3382
  TestLLMServiceLayer(testResponses ?? {}),
3270
3383
  PromptManagerLive
3271
3384
  );
3272
3385
  }
3273
- const configLayer = model ? Layer9.succeed(LLMConfig, LLMConfig.of({ ...llmConfigFromEnv, defaultModel: model })) : LLMConfigFromEnv;
3386
+ const configOverrides = {};
3387
+ if (model) configOverrides.defaultModel = model;
3388
+ if (modelParams?.thinking !== void 0) configOverrides.thinking = modelParams.thinking;
3389
+ if (modelParams?.temperature !== void 0) configOverrides.defaultTemperature = modelParams.temperature;
3390
+ if (modelParams?.maxTokens !== void 0) configOverrides.defaultMaxTokens = modelParams.maxTokens;
3391
+ const configLayer = Object.keys(configOverrides).length > 0 ? Layer9.succeed(LLMConfig, LLMConfig.of({ ...llmConfigFromEnv, ...configOverrides })) : LLMConfigFromEnv;
3274
3392
  const providerLayer = provider === "anthropic" ? AnthropicProviderLive : provider === "openai" ? OpenAIProviderLive : provider === "gemini" ? GeminiProviderLive : provider === "litellm" ? LiteLLMProviderLive : LocalProviderLive;
3275
3393
  return Layer9.mergeAll(
3276
3394
  providerLayer.pipe(Layer9.provide(configLayer)),
@@ -3309,6 +3427,7 @@ export {
3309
3427
  ModelConfigSchema,
3310
3428
  ModelPresets,
3311
3429
  OpenAIProviderLive,
3430
+ PROVIDER_DEFAULT_MODELS,
3312
3431
  PlanSchema,
3313
3432
  PromptManager,
3314
3433
  PromptManagerLive,
@@ -3329,6 +3448,7 @@ export {
3329
3448
  createLLMProviderLayer,
3330
3449
  createLLMProviderLayerWithConfig,
3331
3450
  estimateTokenCount,
3451
+ getProviderDefaultModel,
3332
3452
  llmConfigFromEnv,
3333
3453
  makeCacheable,
3334
3454
  retryPolicy