@reactive-agents/llm-provider 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -3,243 +3,823 @@ import * as effect_Cause from 'effect/Cause';
3
3
  import * as effect_Types from 'effect/Types';
4
4
  import * as effect_Duration from 'effect/Duration';
5
5
 
6
- declare const LLMProviderType: Schema.Literal<["anthropic", "openai", "ollama", "gemini", "custom"]>;
6
+ /**
7
+ * Schema for LLM provider selection.
8
+ * Supported providers: anthropic, openai, ollama, gemini, litellm, custom.
9
+ *
10
+ * @example
11
+ * ```typescript
12
+ * const provider: LLMProvider = "anthropic";
13
+ * ```
14
+ */
15
+ declare const LLMProviderType: Schema.Literal<["anthropic", "openai", "ollama", "gemini", "litellm", "custom"]>;
16
+ /**
17
+ * Union of supported LLM provider names.
18
+ * - "anthropic": Claude models via Anthropic API
19
+ * - "openai": GPT models via OpenAI API
20
+ * - "ollama": Local models via Ollama
21
+ * - "gemini": Google Gemini models
22
+ * - "litellm": LiteLLM proxy (40+ model providers)
23
+ * - "custom": User-defined provider adapter
24
+ */
7
25
  type LLMProvider = Schema.Schema.Type<typeof LLMProviderType>;
26
+ /**
27
+ * Schema for embedding model configuration.
28
+ * Embeddings are used for semantic caching, memory similarity search, and verification.
29
+ * Anthropic provides no embeddings API; embeddings always route to OpenAI or Ollama.
30
+ *
31
+ * @example
32
+ * ```typescript
33
+ * const config: EmbeddingConfig = {
34
+ * model: "text-embedding-3-small",
35
+ * dimensions: 1536,
36
+ * provider: "openai",
37
+ * batchSize: 100
38
+ * };
39
+ * ```
40
+ */
8
41
  declare const EmbeddingConfigSchema: Schema.Struct<{
42
+ /** Embedding model name (e.g., "text-embedding-3-small") */
9
43
  model: typeof Schema.String;
44
+ /** Output embedding vector dimensionality */
10
45
  dimensions: typeof Schema.Number;
46
+ /** Provider hosting the embedding model */
11
47
  provider: Schema.Literal<["openai", "ollama"]>;
48
+ /** Maximum vectors to embed in a single API call (default: 100) */
12
49
  batchSize: Schema.optional<typeof Schema.Number>;
13
50
  }>;
51
+ /**
52
+ * Embedding configuration type.
53
+ * Specifies the embedding model and provider for semantic operations.
54
+ */
14
55
  type EmbeddingConfig = Schema.Schema.Type<typeof EmbeddingConfigSchema>;
56
+ /**
57
+ * Default embedding configuration.
58
+ * Uses OpenAI's text-embedding-3-small with 1536 dimensions.
59
+ *
60
+ * @default { model: "text-embedding-3-small", dimensions: 1536, provider: "openai", batchSize: 100 }
61
+ */
15
62
  declare const DefaultEmbeddingConfig: EmbeddingConfig;
63
+ /**
64
+ * Schema for LLM model configuration options.
65
+ * Includes provider, model name, and optional sampling/output parameters.
66
+ *
67
+ * @example
68
+ * ```typescript
69
+ * const config: ModelConfig = {
70
+ * provider: "anthropic",
71
+ * model: "claude-opus-4-20250514",
72
+ * maxTokens: 4096,
73
+ * temperature: 0.7
74
+ * };
75
+ * ```
76
+ */
16
77
  declare const ModelConfigSchema: Schema.Struct<{
17
- provider: Schema.Literal<["anthropic", "openai", "ollama", "gemini", "custom"]>;
78
+ /** LLM provider identifier */
79
+ provider: Schema.Literal<["anthropic", "openai", "ollama", "gemini", "litellm", "custom"]>;
80
+ /** Model name/identifier for the provider */
18
81
  model: typeof Schema.String;
82
+ /** Maximum tokens in response (optional) */
19
83
  maxTokens: Schema.optional<typeof Schema.Number>;
84
+ /** Sampling temperature 0.0-1.0 (optional) */
20
85
  temperature: Schema.optional<typeof Schema.Number>;
86
+ /** Top-p (nucleus) sampling probability (optional) */
21
87
  topP: Schema.optional<typeof Schema.Number>;
88
+ /** Stop sequences to halt generation (optional) */
22
89
  stopSequences: Schema.optional<Schema.Array$<typeof Schema.String>>;
23
90
  }>;
91
+ /**
92
+ * LLM model configuration type.
93
+ * Specifies which LLM to use and how to configure its behavior.
94
+ */
24
95
  type ModelConfig = Schema.Schema.Type<typeof ModelConfigSchema>;
96
+ /**
97
+ * Pre-configured model profiles for popular LLMs.
98
+ * Each preset includes cost estimates, context window, and quality tiers.
99
+ * Quality tier: 0.0 (low) to 1.0 (highest).
100
+ * Cost: per 1 million input/output tokens in USD.
101
+ *
102
+ * @example
103
+ * ```typescript
104
+ * const preset = ModelPresets["claude-opus"];
105
+ * // { provider: "anthropic", model: "claude-opus-4-20250514", costPer1MInput: 15.0, ... }
106
+ * ```
107
+ */
25
108
  declare const ModelPresets: {
109
+ /**
110
+ * Claude 3.5 Haiku — fast, cost-effective Anthropic model.
111
+ * Best for low-latency, simple reasoning tasks; not recommended for complex analysis.
112
+ */
26
113
  readonly "claude-haiku": {
27
114
  readonly provider: "anthropic";
28
115
  readonly model: "claude-3-5-haiku-20241022";
116
+ /** Cost per 1 million input tokens in USD */
29
117
  readonly costPer1MInput: 1;
118
+ /** Cost per 1 million output tokens in USD */
30
119
  readonly costPer1MOutput: 5;
120
+ /** Maximum context window in tokens */
31
121
  readonly maxContext: 200000;
122
+ /** Quality tier (0.6 = reliable for simple tasks) */
32
123
  readonly quality: 0.6;
33
124
  };
125
+ /**
126
+ * Claude Sonnet 4 — balanced Anthropic model.
127
+ * Recommended for general-purpose reasoning, tool use, and production agents.
128
+ */
34
129
  readonly "claude-sonnet": {
35
130
  readonly provider: "anthropic";
36
131
  readonly model: "claude-sonnet-4-20250514";
37
132
  readonly costPer1MInput: 3;
38
133
  readonly costPer1MOutput: 15;
39
134
  readonly maxContext: 200000;
135
+ /** Quality tier (0.85 = excellent reasoning) */
40
136
  readonly quality: 0.85;
41
137
  };
138
+ /**
139
+ * Claude Sonnet 4.5 — latest Anthropic model.
140
+ * Superior reasoning over Sonnet 4; recommended for complex multi-step reasoning.
141
+ */
42
142
  readonly "claude-sonnet-4-5": {
43
143
  readonly provider: "anthropic";
44
144
  readonly model: "claude-sonnet-4-5-20250929";
45
145
  readonly costPer1MInput: 3;
46
146
  readonly costPer1MOutput: 15;
47
147
  readonly maxContext: 200000;
148
+ /** Quality tier (0.9 = very strong reasoning) */
48
149
  readonly quality: 0.9;
49
150
  };
151
+ /**
152
+ * Claude Opus 4 — most capable Anthropic model.
153
+ * Best for complex analysis, research, and high-accuracy multi-hop reasoning.
154
+ * Largest context window (1M tokens); highest cost.
155
+ */
50
156
  readonly "claude-opus": {
51
157
  readonly provider: "anthropic";
52
158
  readonly model: "claude-opus-4-20250514";
53
159
  readonly costPer1MInput: 15;
54
160
  readonly costPer1MOutput: 75;
55
161
  readonly maxContext: 1000000;
162
+ /** Quality tier (1.0 = frontier-class reasoning) */
56
163
  readonly quality: 1;
57
164
  };
165
+ /**
166
+ * GPT-4o Mini — fast, low-cost OpenAI model.
167
+ * Good for simple tasks and high-throughput scenarios.
168
+ */
58
169
  readonly "gpt-4o-mini": {
59
170
  readonly provider: "openai";
60
171
  readonly model: "gpt-4o-mini";
61
172
  readonly costPer1MInput: 0.15;
62
173
  readonly costPer1MOutput: 0.6;
63
174
  readonly maxContext: 128000;
175
+ /** Quality tier (0.55 = capable but less reliable for complex reasoning) */
64
176
  readonly quality: 0.55;
65
177
  };
178
+ /**
179
+ * GPT-4o — latest OpenAI flagship model.
180
+ * Strong reasoning, multimodal support; recommended for tool use and complex analysis.
181
+ */
66
182
  readonly "gpt-4o": {
67
183
  readonly provider: "openai";
68
184
  readonly model: "gpt-4o";
69
185
  readonly costPer1MInput: 2.5;
70
186
  readonly costPer1MOutput: 10;
71
187
  readonly maxContext: 128000;
188
+ /** Quality tier (0.8 = very good reasoning) */
72
189
  readonly quality: 0.8;
73
190
  };
191
+ /**
192
+ * Gemini 2.0 Flash — fast Google model.
193
+ * Excellent speed and cost efficiency; large 1M context window.
194
+ */
74
195
  readonly "gemini-2.0-flash": {
75
196
  readonly provider: "gemini";
76
197
  readonly model: "gemini-2.0-flash";
77
198
  readonly costPer1MInput: 0.1;
78
199
  readonly costPer1MOutput: 0.4;
79
200
  readonly maxContext: 1000000;
201
+ /** Quality tier (0.75 = good reasoning) */
80
202
  readonly quality: 0.75;
81
203
  };
204
+ /**
205
+ * Gemini 2.5 Pro Preview — advanced Google model.
206
+ * Superior reasoning to Flash; large context window and competitive pricing.
207
+ */
82
208
  readonly "gemini-2.5-pro": {
83
209
  readonly provider: "gemini";
84
210
  readonly model: "gemini-2.5-pro-preview-03-25";
85
211
  readonly costPer1MInput: 1.25;
86
212
  readonly costPer1MOutput: 10;
87
213
  readonly maxContext: 1000000;
214
+ /** Quality tier (0.95 = excellent reasoning) */
88
215
  readonly quality: 0.95;
89
216
  };
90
217
  };
218
+ /**
219
+ * Union of all model preset names.
220
+ * Use to select a pre-configured model with cost/quality/context metadata.
221
+ *
222
+ * @example
223
+ * ```typescript
224
+ * const presetName: ModelPresetName = "claude-opus";
225
+ * const preset = ModelPresets[presetName];
226
+ * ```
227
+ */
91
228
  type ModelPresetName = keyof typeof ModelPresets;
229
+ /**
230
+ * Schema for Anthropic prompt caching control.
231
+ * Currently only supports "ephemeral" type (cache for this request only).
232
+ * Non-Anthropic providers silently ignore cache_control directives.
233
+ *
234
+ * @example
235
+ * ```typescript
236
+ * const cacheControl: CacheControl = { type: "ephemeral" };
237
+ * ```
238
+ */
92
239
  declare const CacheControlSchema: Schema.Struct<{
240
+ /** Cache type: "ephemeral" for request-scoped caching */
93
241
  type: Schema.Literal<["ephemeral"]>;
94
242
  }>;
243
+ /**
244
+ * Anthropic prompt caching configuration.
245
+ * Wraps text content blocks to enable prompt caching optimization.
246
+ * Reduces costs for repeated context; only supported on Anthropic provider.
247
+ */
95
248
  type CacheControl = Schema.Schema.Type<typeof CacheControlSchema>;
249
+ /**
250
+ * Schema for image source reference.
251
+ * Supports base64-encoded or URL-referenced images in PNG, JPEG, GIF, or WebP format.
252
+ *
253
+ * @example
254
+ * ```typescript
255
+ * const source: ImageSource = {
256
+ * type: "base64",
257
+ * media_type: "image/png",
258
+ * data: "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg=="
259
+ * };
260
+ * ```
261
+ */
96
262
  declare const ImageSourceSchema: Schema.Struct<{
263
+ /** Image source type: "base64" for encoded data or "url" for HTTP(S) URL */
97
264
  type: Schema.Literal<["base64", "url"]>;
265
+ /** MIME type of image: PNG, JPEG, GIF, or WebP */
98
266
  media_type: Schema.Literal<["image/png", "image/jpeg", "image/gif", "image/webp"]>;
267
+ /** Either base64-encoded data or HTTPS URL */
99
268
  data: typeof Schema.String;
100
269
  }>;
270
+ /**
271
+ * Image source reference type.
272
+ * Either a base64-encoded image or an HTTPS URL to an image resource.
273
+ */
101
274
  type ImageSource = Schema.Schema.Type<typeof ImageSourceSchema>;
275
+ /**
276
+ * Schema for text content blocks.
277
+ * Supports optional Anthropic prompt caching via cache_control.
278
+ *
279
+ * @example
280
+ * ```typescript
281
+ * const textBlock: TextContentBlock = {
282
+ * type: "text",
283
+ * text: "This is a text message"
284
+ * };
285
+ * ```
286
+ */
102
287
  declare const TextContentBlockSchema: Schema.Struct<{
288
+ /** Content type identifier */
103
289
  type: Schema.Literal<["text"]>;
290
+ /** Text content */
104
291
  text: typeof Schema.String;
292
+ /** Optional Anthropic cache control directive */
105
293
  cache_control: Schema.optional<Schema.Struct<{
294
+ /** Cache type: "ephemeral" for request-scoped caching */
106
295
  type: Schema.Literal<["ephemeral"]>;
107
296
  }>>;
108
297
  }>;
298
+ /**
299
+ * Schema for image content blocks.
300
+ *
301
+ * @example
302
+ * ```typescript
303
+ * const imageBlock: ImageContentBlock = {
304
+ * type: "image",
305
+ * source: { type: "url", media_type: "image/png", data: "https://..." }
306
+ * };
307
+ * ```
308
+ */
109
309
  declare const ImageContentBlockSchema: Schema.Struct<{
310
+ /** Content type identifier */
110
311
  type: Schema.Literal<["image"]>;
312
+ /** Image source reference */
111
313
  source: Schema.Struct<{
314
+ /** Image source type: "base64" for encoded data or "url" for HTTP(S) URL */
112
315
  type: Schema.Literal<["base64", "url"]>;
316
+ /** MIME type of image: PNG, JPEG, GIF, or WebP */
113
317
  media_type: Schema.Literal<["image/png", "image/jpeg", "image/gif", "image/webp"]>;
318
+ /** Either base64-encoded data or HTTPS URL */
114
319
  data: typeof Schema.String;
115
320
  }>;
116
321
  }>;
322
+ /**
323
+ * Schema for tool use content blocks (model invoking a tool).
324
+ *
325
+ * @example
326
+ * ```typescript
327
+ * const toolBlock: ToolUseContentBlock = {
328
+ * type: "tool_use",
329
+ * id: "toolu_123",
330
+ * name: "file-read",
331
+ * input: { path: "./output.txt" }
332
+ * };
333
+ * ```
334
+ */
117
335
  declare const ToolUseContentBlockSchema: Schema.Struct<{
336
+ /** Content type identifier */
118
337
  type: Schema.Literal<["tool_use"]>;
338
+ /** Unique tool call identifier */
119
339
  id: typeof Schema.String;
340
+ /** Tool name being invoked */
120
341
  name: typeof Schema.String;
342
+ /** Tool parameters (JSON-compatible object) */
121
343
  input: typeof Schema.Unknown;
122
344
  }>;
345
+ /**
346
+ * Schema for tool result content blocks (system returning tool output).
347
+ *
348
+ * @example
349
+ * ```typescript
350
+ * const resultBlock: ToolResultContentBlock = {
351
+ * type: "tool_result",
352
+ * tool_use_id: "toolu_123",
353
+ * content: "File contents..."
354
+ * };
355
+ * ```
356
+ */
123
357
  declare const ToolResultContentBlockSchema: Schema.Struct<{
358
+ /** Content type identifier */
124
359
  type: Schema.Literal<["tool_result"]>;
360
+ /** ID of tool call this result corresponds to */
125
361
  tool_use_id: typeof Schema.String;
362
+ /** Tool result/output content */
126
363
  content: typeof Schema.String;
127
364
  }>;
365
+ /**
366
+ * Union of all content block types used in LLM messages.
367
+ * Content blocks allow mixing text, images, tool invocations, and tool results.
368
+ *
369
+ * @example
370
+ * ```typescript
371
+ * const blocks: readonly ContentBlock[] = [
372
+ * { type: "text", text: "Analyze this image:" },
373
+ * { type: "image", source: { type: "url", media_type: "image/png", data: "https://..." } }
374
+ * ];
375
+ * ```
376
+ */
128
377
  type ContentBlock = {
378
+ /** Text content (optionally cached with Anthropic) */
129
379
  readonly type: "text";
130
380
  readonly text: string;
131
381
  readonly cache_control?: CacheControl;
132
382
  } | {
383
+ /** Image content */
133
384
  readonly type: "image";
134
385
  readonly source: ImageSource;
135
386
  } | {
387
+ /** Model invoking a tool */
136
388
  readonly type: "tool_use";
137
389
  readonly id: string;
138
390
  readonly name: string;
139
391
  readonly input: unknown;
140
392
  } | {
393
+ /** System returning tool output */
141
394
  readonly type: "tool_result";
142
395
  readonly tool_use_id: string;
143
396
  readonly content: string;
144
397
  };
398
+ /**
399
+ * Text content block with cache control enabled.
400
+ * Used when text context should be cached for cost reduction (Anthropic only).
401
+ * Non-Anthropic providers silently ignore the cache_control directive.
402
+ *
403
+ * @example
404
+ * ```typescript
405
+ * const cached: CacheableContentBlock = {
406
+ * type: "text",
407
+ * text: "Expensive context (system prompt, instructions, etc)",
408
+ * cache_control: { type: "ephemeral" }
409
+ * };
410
+ * ```
411
+ */
145
412
  type CacheableContentBlock = {
413
+ /** Always "text" */
146
414
  readonly type: "text";
415
+ /** Cached text content */
147
416
  readonly text: string;
417
+ /** Cache control directive (always ephemeral) */
148
418
  readonly cache_control: CacheControl;
149
419
  };
150
420
  /**
151
- * Helper wrap text in a cacheable content block.
152
- * Non-Anthropic providers silently ignore `cache_control`.
421
+ * Wrap plain text in a cacheable content block.
422
+ * Enables Anthropic prompt caching for the given text (no-op for other providers).
423
+ * Useful for repeated context like system prompts, instructions, or reference documents.
424
+ *
425
+ * @param text — The text to cache
426
+ * @returns A content block with ephemeral cache control enabled
427
+ *
428
+ * @example
429
+ * ```typescript
430
+ * const cached = makeCacheable("You are a helpful assistant...");
431
+ * // Returns: { type: "text", text: "...", cache_control: { type: "ephemeral" } }
432
+ * ```
153
433
  */
154
434
  declare const makeCacheable: (text: string) => CacheableContentBlock;
435
+ /**
436
+ * Union of LLM message roles.
437
+ * Each message has a role (system, user, assistant, tool) and content.
438
+ *
439
+ * - **system**: Instructions/context set by the agent developer. Content is always a string.
440
+ * - **user**: User query or context provided by caller. Content is string or content blocks.
441
+ * - **assistant**: Model response or thoughts. Content is string or content blocks (including tool_use).
442
+ * - **tool**: Tool execution result returned to model. Content is always string.
443
+ *
444
+ * @example
445
+ * ```typescript
446
+ * const messages: readonly LLMMessage[] = [
447
+ * { role: "system", content: "You are a helpful assistant." },
448
+ * { role: "user", content: "What is 2+2?" },
449
+ * { role: "assistant", content: "2+2 equals 4." }
450
+ * ];
451
+ *
452
+ * const withTools: readonly LLMMessage[] = [
453
+ * { role: "user", content: "Read the file." },
454
+ * {
455
+ * role: "assistant",
456
+ * content: [
457
+ * { type: "text", text: "I'll read that file for you." },
458
+ * { type: "tool_use", id: "toolu_1", name: "file-read", input: { path: "./data.txt" } }
459
+ * ]
460
+ * },
461
+ * { role: "tool", toolCallId: "toolu_1", content: "File contents here..." }
462
+ * ];
463
+ * ```
464
+ */
155
465
  type LLMMessage = {
466
+ /** System prompt/instructions — context set by developer */
156
467
  readonly role: "system";
468
+ /** Plain text string only (no content blocks) */
157
469
  readonly content: string;
158
470
  } | {
471
+ /** User input/query */
159
472
  readonly role: "user";
473
+ /** Plain text or multimodal content blocks */
160
474
  readonly content: string | readonly ContentBlock[];
161
475
  } | {
476
+ /** Model response or reasoning */
162
477
  readonly role: "assistant";
478
+ /** Plain text or multimodal content blocks (including tool_use) */
163
479
  readonly content: string | readonly ContentBlock[];
164
480
  } | {
481
+ /** Tool execution result */
165
482
  readonly role: "tool";
483
+ /** Tool call ID this result corresponds to */
166
484
  readonly toolCallId: string;
485
+ /** Plain text result/output */
167
486
  readonly content: string;
168
487
  };
488
+ /**
489
+ * Schema for token usage statistics from an LLM response.
490
+ * Used for cost tracking, budget enforcement, and observability.
491
+ *
492
+ * @example
493
+ * ```typescript
494
+ * const usage: TokenUsage = {
495
+ * inputTokens: 1200,
496
+ * outputTokens: 450,
497
+ * totalTokens: 1650,
498
+ * estimatedCost: 0.0045
499
+ * };
500
+ * ```
501
+ */
169
502
  declare const TokenUsageSchema: Schema.Struct<{
503
+ /** Tokens consumed by the input (messages + system prompt) */
170
504
  inputTokens: typeof Schema.Number;
505
+ /** Tokens generated in the response */
171
506
  outputTokens: typeof Schema.Number;
507
+ /** Sum of input and output tokens */
172
508
  totalTokens: typeof Schema.Number;
509
+ /** Estimated cost in USD based on provider pricing */
173
510
  estimatedCost: typeof Schema.Number;
174
511
  }>;
512
+ /**
513
+ * Token usage from an LLM response.
514
+ * Tracks input/output tokens separately for cost calculation.
515
+ */
175
516
  type TokenUsage = Schema.Schema.Type<typeof TokenUsageSchema>;
517
+ /**
518
+ * Schema for LLM response termination reason.
519
+ * Indicates why the model stopped generating tokens.
520
+ *
521
+ * @example
522
+ * ```typescript
523
+ * const reason: StopReason = "end_turn"; // Model concluded naturally
524
+ * const reason2: StopReason = "max_tokens"; // Hit output limit
525
+ * ```
526
+ */
176
527
  declare const StopReasonSchema: Schema.Literal<["end_turn", "max_tokens", "stop_sequence", "tool_use"]>;
528
+ /**
529
+ * Reason the LLM stopped generating.
530
+ *
531
+ * - **end_turn**: Model concluded naturally — response is complete.
532
+ * - **max_tokens**: Hit configured output token limit — response may be truncated.
533
+ * - **stop_sequence**: Hit a configured stop sequence — generation halted by design.
534
+ * - **tool_use**: Model is invoking a tool — `toolCalls` array is populated.
535
+ */
177
536
  type StopReason = Schema.Schema.Type<typeof StopReasonSchema>;
537
+ /**
538
+ * Schema for tool definitions.
539
+ * Describes tools available to the LLM, including name, description, and input schema.
540
+ * Tools are passed to the LLM for function calling / tool use.
541
+ *
542
+ * @example
543
+ * ```typescript
544
+ * const tool: ToolDefinition = {
545
+ * name: "file-read",
546
+ * description: "Read a file from disk",
547
+ * inputSchema: {
548
+ * path: { type: "string", description: "File path", required: true }
549
+ * }
550
+ * };
551
+ * ```
552
+ */
178
553
  declare const ToolDefinitionSchema: Schema.Struct<{
554
+ /** Tool identifier (used by model to invoke the tool) */
179
555
  name: typeof Schema.String;
556
+ /** Human-readable tool description for the model */
180
557
  description: typeof Schema.String;
558
+ /** Input schema describing expected parameters (JSON Schema format) */
181
559
  inputSchema: Schema.Record$<typeof Schema.String, typeof Schema.Unknown>;
182
560
  }>;
561
+ /**
562
+ * Tool definition.
563
+ * Used to register available functions that the LLM can call.
564
+ * Input schema is a JSON Schema object defining parameters.
565
+ */
183
566
  type ToolDefinition = Schema.Schema.Type<typeof ToolDefinitionSchema>;
567
+ /**
568
+ * Schema for tool invocation.
569
+ * Emitted by the model when it decides to call a tool.
570
+ *
571
+ * @example
572
+ * ```typescript
573
+ * const call: ToolCall = {
574
+ * id: "toolu_123",
575
+ * name: "file-read",
576
+ * input: { path: "./output.txt" }
577
+ * };
578
+ * ```
579
+ */
184
580
  declare const ToolCallSchema: Schema.Struct<{
581
+ /** Unique tool call identifier (generated by model) */
185
582
  id: typeof Schema.String;
583
+ /** Tool name to invoke */
186
584
  name: typeof Schema.String;
585
+ /** Tool input parameters (arbitrary JSON-compatible object) */
187
586
  input: typeof Schema.Unknown;
188
587
  }>;
588
+ /**
589
+ * Tool invocation from the LLM.
590
+ * When the model decides to call a tool, this describes which tool and with what inputs.
591
+ */
189
592
  type ToolCall = Schema.Schema.Type<typeof ToolCallSchema>;
593
+ /**
594
+ * Request to the LLM for a completion.
595
+ * Includes messages, model configuration, tool definitions, and sampling parameters.
596
+ * Passed to LLMService.complete() for synchronous LLM calls.
597
+ *
598
+ * @see CompletionResponse — the response type returned by LLMService.complete()
599
+ * @see ToolDefinition — shape of entries in the `tools` array
600
+ * @see ModelConfig — shape of the `model` field
601
+ *
602
+ * @example
603
+ * ```typescript
604
+ * const request: CompletionRequest = {
605
+ * messages: [
606
+ * { role: "system", content: "You are a helpful assistant." },
607
+ * { role: "user", content: "What is the capital of France?" }
608
+ * ],
609
+ * model: { provider: "anthropic", model: "claude-opus-4-20250514" },
610
+ * maxTokens: 1024,
611
+ * temperature: 0.7,
612
+ * tools: [
613
+ * { name: "web-search", description: "Search the web", inputSchema: { query: { type: "string" } } }
614
+ * ]
615
+ * };
616
+ * ```
617
+ */
190
618
  type CompletionRequest = {
619
+ /** Conversation history (at least 1 message required) */
191
620
  readonly messages: readonly LLMMessage[];
621
+ /** Model config (provider + model name + optional sampling params) */
192
622
  readonly model?: ModelConfig;
623
+ /** Maximum response tokens (optional, uses config default if omitted) */
193
624
  readonly maxTokens?: number;
625
+ /** Sampling temperature 0.0-1.0 (optional, uses config default if omitted) */
194
626
  readonly temperature?: number;
627
+ /** Stop sequences to halt generation (optional) */
195
628
  readonly stopSequences?: readonly string[];
629
+ /** Tools available for the model to call (optional) */
196
630
  readonly tools?: readonly ToolDefinition[];
631
+ /** System prompt (optional, prepended to user messages) */
197
632
  readonly systemPrompt?: string;
198
633
  };
634
+ /**
635
+ * Schema for LLM response.
636
+ * Contains the generated content, stop reason, token usage, and any tool calls.
637
+ *
638
+ * @example
639
+ * ```typescript
640
+ * const response: CompletionResponse = {
641
+ * content: "The capital of France is Paris.",
642
+ * stopReason: "end_turn",
643
+ * usage: { inputTokens: 120, outputTokens: 15, totalTokens: 135, estimatedCost: 0.00041 },
644
+ * model: "claude-opus-4-20250514",
645
+ * toolCalls: undefined
646
+ * };
647
+ * ```
648
+ */
199
649
  declare const CompletionResponseSchema: Schema.Struct<{
650
+ /** Generated response content (text only, no content blocks) */
200
651
  content: typeof Schema.String;
652
+ /** Why the model stopped generating */
201
653
  stopReason: Schema.Literal<["end_turn", "max_tokens", "stop_sequence", "tool_use"]>;
654
+ /** Token usage statistics */
202
655
  usage: Schema.Struct<{
656
+ /** Tokens consumed by the input (messages + system prompt) */
203
657
  inputTokens: typeof Schema.Number;
658
+ /** Tokens generated in the response */
204
659
  outputTokens: typeof Schema.Number;
660
+ /** Sum of input and output tokens */
205
661
  totalTokens: typeof Schema.Number;
662
+ /** Estimated cost in USD based on provider pricing */
206
663
  estimatedCost: typeof Schema.Number;
207
664
  }>;
665
+ /** Actual model identifier used (may differ from request) */
208
666
  model: typeof Schema.String;
667
+ /** Tool calls emitted by the model (if any) */
209
668
  toolCalls: Schema.optional<Schema.Array$<Schema.Struct<{
669
+ /** Unique tool call identifier (generated by model) */
210
670
  id: typeof Schema.String;
671
+ /** Tool name to invoke */
211
672
  name: typeof Schema.String;
673
+ /** Tool input parameters (arbitrary JSON-compatible object) */
212
674
  input: typeof Schema.Unknown;
213
675
  }>>>;
676
+ /** Internal reasoning from thinking models (e.g. <think> blocks from qwen3, DeepSeek-R1) */
677
+ thinking: Schema.optional<typeof Schema.String>;
214
678
  }>;
679
+ /**
680
+ * LLM response to a completion request.
681
+ * Contains generated text, stop reason, usage metrics, and optional tool calls.
682
+ *
683
+ * @see CompletionRequest — the request type passed to LLMService.complete()
684
+ * @see StopReason — possible values for the `stopReason` field
685
+ * @see TokenUsage — shape of the `usage` field
686
+ * @see ToolCall — shape of entries in the optional `toolCalls` array
687
+ */
215
688
  type CompletionResponse = Schema.Schema.Type<typeof CompletionResponseSchema>;
689
+ /**
690
+ * Events streamed during an LLM response.
691
+ * Used when streaming responses rather than waiting for full completion.
692
+ * Events arrive in sequence: text_delta(s), then tool_use_start/delta(s) if applicable, then content_complete, then usage.
693
+ *
694
+ * @example
695
+ * ```typescript
696
+ * const events: StreamEvent[] = [
697
+ * { type: "text_delta", text: "The " },
698
+ * { type: "text_delta", text: "capital " },
699
+ * { type: "text_delta", text: "is Paris." },
700
+ * { type: "content_complete", content: "The capital is Paris." },
701
+ * { type: "usage", usage: { inputTokens: 50, outputTokens: 10, totalTokens: 60, estimatedCost: 0.00018 } }
702
+ * ];
703
+ * ```
704
+ */
216
705
  type StreamEvent = {
706
+ /** Text chunk arriving */
217
707
  readonly type: "text_delta";
708
+ /** Text chunk content */
218
709
  readonly text: string;
219
710
  } | {
711
+ /** Tool invocation starting */
220
712
  readonly type: "tool_use_start";
713
+ /** Unique tool call ID */
221
714
  readonly id: string;
715
+ /** Tool name being invoked */
222
716
  readonly name: string;
223
717
  } | {
718
+ /** Tool input parameter chunk arriving */
224
719
  readonly type: "tool_use_delta";
720
+ /** JSON parameter chunk (accumulated to form full input) */
225
721
  readonly input: string;
226
722
  } | {
723
+ /** Content generation completed */
227
724
  readonly type: "content_complete";
725
+ /** Full accumulated response content */
228
726
  readonly content: string;
229
727
  } | {
728
+ /** Token usage reported */
230
729
  readonly type: "usage";
730
+ /** Final token usage for the request */
231
731
  readonly usage: TokenUsage;
232
732
  } | {
733
+ /** Error occurred during streaming */
233
734
  readonly type: "error";
735
+ /** Error message */
234
736
  readonly error: string;
235
737
  };
738
+ /**
739
+ * Completion request with structured output validation.
740
+ * Extends CompletionRequest to require the model output conform to a schema.
741
+ * Used when the agent needs guaranteed JSON schema output from the LLM.
742
+ *
743
+ * @see CompletionRequest — base request type this extends
744
+ *
745
+ * @typeParam A — The type that the LLM output must conform to
746
+ *
747
+ * @example
748
+ * ```typescript
749
+ * interface Decision {
750
+ * readonly choice: "yes" | "no";
751
+ * readonly confidence: number;
752
+ * }
753
+ *
754
+ * const request: StructuredCompletionRequest<Decision> = {
755
+ * messages: [{ role: "user", content: "Should I approve this?" }],
756
+ * outputSchema: Schema.Struct({
757
+ * choice: Schema.Literal("yes", "no"),
758
+ * confidence: Schema.Number
759
+ * }),
760
+ * maxParseRetries: 2
761
+ * };
762
+ * ```
763
+ */
236
764
  type StructuredCompletionRequest<A> = CompletionRequest & {
765
+ /** Schema that the LLM response must conform to */
237
766
  readonly outputSchema: Schema.Schema<A>;
767
+ /** If true, retry with corrected prompt if parse fails (default: false) */
238
768
  readonly retryOnParseFail?: boolean;
769
+ /** Maximum parse retry attempts before giving up (default: 1) */
239
770
  readonly maxParseRetries?: number;
240
771
  };
241
- type TruncationStrategy = "drop-oldest" | "summarize-middle" | "sliding-window" | "importance-based";
242
- type ObservabilityVerbosity = "metadata" | "full";
772
+ /**
773
+ * Strategy for truncating context when it exceeds token budget.
774
+ * Used by ContextWindowManager when compacting message history for token limits.
775
+ *
776
+ * @example
777
+ * ```typescript
778
+ * const strategy: TruncationStrategy = "summarize-middle";
779
+ * ```
780
+ */
781
+ type TruncationStrategy =
782
+ /** Remove oldest messages first (FIFO). Fastest; may lose early context. */
783
+ "drop-oldest"
784
+ /** Summarize middle messages, preserving system prompt and most recent turns. */
785
+ | "summarize-middle"
786
+ /** Keep only the most recent N messages; drops all prior history. */
787
+ | "sliding-window"
788
+ /** Use heuristics to score and drop least-important messages first. */
789
+ | "importance-based";
790
+ /**
791
+ * Observability verbosity level for LLM request events.
792
+ * Controls what is captured in each `LLMRequestEvent` published to the EventBus.
793
+ *
794
+ * @default "full"
795
+ *
796
+ * @example
797
+ * ```typescript
798
+ * const config = LLMConfig.of({
799
+ * // ... other fields
800
+ * observabilityVerbosity: process.env.NODE_ENV === "production" ? "metadata" : "full"
801
+ * });
802
+ * ```
803
+ */
804
+ type ObservabilityVerbosity =
805
+ /** Capture timing, token counts, and cost only — lightweight, production-safe. */
806
+ "metadata"
807
+ /** Capture complete request/response payloads — higher overhead, useful for debugging. */
808
+ | "full";
809
+ /**
810
+ * Provider-reported capabilities for structured JSON output.
811
+ * Used by the structured output pipeline to select the optimal extraction strategy.
812
+ */
813
+ type StructuredOutputCapabilities = {
814
+ /** Provider supports forcing JSON-only output (OpenAI, Gemini, Ollama) */
815
+ readonly nativeJsonMode: boolean;
816
+ /** Provider can enforce a JSON Schema on the output (OpenAI structured outputs) */
817
+ readonly jsonSchemaEnforcement: boolean;
818
+ /** Provider supports assistant message prefill to start response with "{" (Anthropic) */
819
+ readonly prefillSupport: boolean;
820
+ /** Provider supports GBNF grammar constraints for exact schema matching (Ollama/llama.cpp) */
821
+ readonly grammarConstraints: boolean;
822
+ };
243
823
 
244
824
  declare const LLMError_base: new <A extends Record<string, any> = {}>(args: effect_Types.Equals<A, {}> extends true ? void : { readonly [P in keyof A as P extends "_tag" ? never : P]: A[P]; }) => effect_Cause.YieldableError & {
245
825
  readonly _tag: "LLMError";
@@ -340,6 +920,11 @@ declare const LLMService_base: Context.TagClass<LLMService, "LLMService", {
340
920
  * Get current model configuration.
341
921
  */
342
922
  readonly getModelConfig: () => Effect.Effect<ModelConfig, never>;
923
+ /**
924
+ * Report structured output capabilities for this provider.
925
+ * Used by the structured output pipeline to select optimal JSON extraction strategy.
926
+ */
927
+ readonly getStructuredOutputCapabilities: () => Effect.Effect<StructuredOutputCapabilities, never>;
343
928
  }>;
344
929
  /**
345
930
  * Core LLM service — all LLM interactions go through this.
@@ -349,78 +934,315 @@ declare class LLMService extends LLMService_base {
349
934
  }
350
935
 
351
936
  declare const LLMConfig_base: Context.TagClass<LLMConfig, "LLMConfig", {
937
+ /**
938
+ * Default LLM provider.
939
+ * Used as fallback when a request does not specify a provider.
940
+ *
941
+ * @default "anthropic"
942
+ */
352
943
  readonly defaultProvider: LLMProvider;
944
+ /**
945
+ * Default LLM model identifier.
946
+ * Used as fallback when a request does not specify a model.
947
+ *
948
+ * @default From LLM_DEFAULT_MODEL env var, falls back to "claude-sonnet-4-20250514"
949
+ */
353
950
  readonly defaultModel: string;
951
+ /**
952
+ * Anthropic API key.
953
+ * Retrieved from ANTHROPIC_API_KEY environment variable.
954
+ * Required if provider is "anthropic".
955
+ *
956
+ * @default From ANTHROPIC_API_KEY env var (undefined if not set)
957
+ */
354
958
  readonly anthropicApiKey?: string;
959
+ /**
960
+ * OpenAI API key.
961
+ * Retrieved from OPENAI_API_KEY environment variable.
962
+ * Required if provider is "openai".
963
+ *
964
+ * @default From OPENAI_API_KEY env var (undefined if not set)
965
+ */
355
966
  readonly openaiApiKey?: string;
967
+ /**
968
+ * Google API key.
969
+ * Retrieved from GOOGLE_API_KEY environment variable.
970
+ * Required if provider is "gemini".
971
+ *
972
+ * @default From GOOGLE_API_KEY env var (undefined if not set)
973
+ */
356
974
  readonly googleApiKey?: string;
975
+ /**
976
+ * Ollama server endpoint.
977
+ * Retrieved from OLLAMA_ENDPOINT environment variable.
978
+ * Used for local model serving.
979
+ *
980
+ * @default "http://localhost:11434"
981
+ */
357
982
  readonly ollamaEndpoint?: string;
358
983
  /**
359
- * Embedding configuration. Anthropic has no embeddings API;
360
- * embeddings route to OpenAI (default) or Ollama.
361
- * This is the SOLE embedding config for the entire framework.
984
+ * Embedding configuration model, provider, dimensions.
985
+ * Anthropic has no embeddings API; embeddings always route to OpenAI or Ollama.
986
+ * This is the sole embedding config for the entire framework.
987
+ * Used by semantic cache, memory similarity search, and verification layers.
988
+ *
989
+ * @default { model: "text-embedding-3-small", dimensions: 1536, provider: "openai", batchSize: 100 }
362
990
  */
363
991
  readonly embeddingConfig: EmbeddingConfig;
364
992
  /**
365
993
  * Enable Anthropic prompt caching.
366
- * When true, memory context injections are wrapped in
367
- * `cache_control: { type: "ephemeral" }` blocks.
994
+ * When true, memory context injections and system prompts are wrapped in
995
+ * `cache_control: { type: "ephemeral" }` blocks to reduce costs.
996
+ * Non-Anthropic providers silently ignore cache control directives.
997
+ * Automatically set to true if defaultModel starts with "claude".
998
+ *
999
+ * @default true if defaultModel starts with "claude", false otherwise
368
1000
  */
369
1001
  readonly supportsPromptCaching: boolean;
1002
+ /**
1003
+ * Maximum number of retries for transient LLM request failures.
1004
+ * Applied with exponential backoff (2^n seconds between attempts).
1005
+ *
1006
+ * @default 3
1007
+ */
370
1008
  readonly maxRetries: number;
1009
+ /**
1010
+ * Request timeout in milliseconds.
1011
+ * LLM requests exceeding this duration are aborted.
1012
+ *
1013
+ * @default 30000 (30 seconds)
1014
+ */
371
1015
  readonly timeoutMs: number;
1016
+ /**
1017
+ * Enable/disable thinking mode for thinking-capable models.
1018
+ * - `true` — Always enable thinking (e.g., qwen3.5, DeepSeek-R1)
1019
+ * - `false` — Always disable thinking (e.g., cogito:14b that crashes with think:true)
1020
+ * - `undefined` — Auto-detect based on model capabilities (Ollama only)
1021
+ *
1022
+ * @default undefined (auto-detect)
1023
+ */
1024
+ readonly thinking?: boolean;
1025
+ /**
1026
+ * Default maximum output tokens for LLM responses.
1027
+ * Used if a CompletionRequest does not specify maxTokens.
1028
+ * Set lower for faster responses; higher for longer outputs.
1029
+ *
1030
+ * @default 4096
1031
+ */
372
1032
  readonly defaultMaxTokens: number;
1033
+ /**
1034
+ * Default sampling temperature (0.0-1.0).
1035
+ * Used if a CompletionRequest does not specify temperature.
1036
+ * 0.0 = deterministic; 1.0 = maximum randomness.
1037
+ *
1038
+ * @default 0.7 (good balance of creativity and coherence)
1039
+ */
373
1040
  readonly defaultTemperature: number;
374
1041
  /**
375
1042
  * LLM request/response observability verbosity.
376
- * "full" captures the complete request and response payloads.
377
- * "metadata" captures only timing/token counts (cheaper for production).
378
- * Default: "full" always capture everything during development.
1043
+ * Determines what data is captured in LLMRequestEvent for observability.
1044
+ *
1045
+ * - **"full"**: Capture complete request/response payloads (useful for debugging, higher overhead)
1046
+ * - **"metadata"**: Capture only timing, token counts, and cost (lightweight, production-safe)
1047
+ *
1048
+ * @default "full" (capture everything)
1049
+ *
1050
+ * @example
1051
+ * ```typescript
1052
+ * // Development: full details
1053
+ * observabilityVerbosity: process.env.NODE_ENV === "production" ? "metadata" : "full"
1054
+ * ```
379
1055
  */
380
1056
  readonly observabilityVerbosity: ObservabilityVerbosity;
381
1057
  }>;
382
1058
  /**
383
- * LLM configuration — provided via environment or config file.
1059
+ * LLM service configuration.
1060
+ * Provides API keys, default model settings, timeouts, and observability verbosity.
1061
+ * Typically constructed from environment variables via llmConfigFromEnv.
1062
+ *
1063
+ * @example
1064
+ * ```typescript
1065
+ * const config = LLMConfig.of({
1066
+ * defaultProvider: "anthropic",
1067
+ * defaultModel: "claude-opus-4-20250514",
1068
+ * anthropicApiKey: process.env.ANTHROPIC_API_KEY,
1069
+ * maxRetries: 3,
1070
+ * timeoutMs: 30000
1071
+ * });
1072
+ * ```
384
1073
  */
385
1074
  declare class LLMConfig extends LLMConfig_base {
386
1075
  }
387
1076
  /**
388
- * Raw LLMConfig value from environment variables.
389
- * Exported so callers can spread overrides (e.g. model) on top.
1077
+ * Raw LLMConfig object constructed from environment variables.
1078
+ * Reads all config from process.env with sensible defaults.
1079
+ * Exported so callers can spread overrides (e.g. change model) on top.
1080
+ *
1081
+ * Environment variables:
1082
+ * - LLM_DEFAULT_MODEL: Model identifier (default: claude-sonnet-4-20250514)
1083
+ * - ANTHROPIC_API_KEY: Anthropic API key
1084
+ * - OPENAI_API_KEY: OpenAI API key
1085
+ * - GOOGLE_API_KEY: Google API key
1086
+ * - OLLAMA_ENDPOINT: Ollama server URL (default: http://localhost:11434)
1087
+ * - EMBEDDING_MODEL: Embedding model name (default: text-embedding-3-small)
1088
+ * - EMBEDDING_DIMENSIONS: Embedding vector dimensions (default: 1536)
1089
+ * - EMBEDDING_PROVIDER: Embedding provider (default: openai)
1090
+ * - LLM_MAX_RETRIES: Retry attempts (default: 3)
1091
+ * - LLM_TIMEOUT_MS: Request timeout in ms (default: 30000)
1092
+ * - LLM_DEFAULT_TEMPERATURE: Sampling temperature (default: 0.7)
1093
+ * - LLM_OBSERVABILITY_VERBOSITY: "full" or "metadata" (default: full)
1094
+ *
1095
+ * @example
1096
+ * ```typescript
1097
+ * // Use defaults from environment
1098
+ * const config = llmConfigFromEnv;
1099
+ *
1100
+ * // Override specific fields
1101
+ * const customConfig = LLMConfig.of({
1102
+ * ...llmConfigFromEnv,
1103
+ * defaultModel: "gpt-4o",
1104
+ * defaultProvider: "openai"
1105
+ * });
1106
+ * ```
390
1107
  */
391
1108
  declare const llmConfigFromEnv: {
1109
+ /**
1110
+ * Default LLM provider.
1111
+ * Used as fallback when a request does not specify a provider.
1112
+ *
1113
+ * @default "anthropic"
1114
+ */
392
1115
  readonly defaultProvider: LLMProvider;
1116
+ /**
1117
+ * Default LLM model identifier.
1118
+ * Used as fallback when a request does not specify a model.
1119
+ *
1120
+ * @default From LLM_DEFAULT_MODEL env var, falls back to "claude-sonnet-4-20250514"
1121
+ */
393
1122
  readonly defaultModel: string;
1123
+ /**
1124
+ * Anthropic API key.
1125
+ * Retrieved from ANTHROPIC_API_KEY environment variable.
1126
+ * Required if provider is "anthropic".
1127
+ *
1128
+ * @default From ANTHROPIC_API_KEY env var (undefined if not set)
1129
+ */
394
1130
  readonly anthropicApiKey?: string;
1131
+ /**
1132
+ * OpenAI API key.
1133
+ * Retrieved from OPENAI_API_KEY environment variable.
1134
+ * Required if provider is "openai".
1135
+ *
1136
+ * @default From OPENAI_API_KEY env var (undefined if not set)
1137
+ */
395
1138
  readonly openaiApiKey?: string;
1139
+ /**
1140
+ * Google API key.
1141
+ * Retrieved from GOOGLE_API_KEY environment variable.
1142
+ * Required if provider is "gemini".
1143
+ *
1144
+ * @default From GOOGLE_API_KEY env var (undefined if not set)
1145
+ */
396
1146
  readonly googleApiKey?: string;
1147
+ /**
1148
+ * Ollama server endpoint.
1149
+ * Retrieved from OLLAMA_ENDPOINT environment variable.
1150
+ * Used for local model serving.
1151
+ *
1152
+ * @default "http://localhost:11434"
1153
+ */
397
1154
  readonly ollamaEndpoint?: string;
398
1155
  /**
399
- * Embedding configuration. Anthropic has no embeddings API;
400
- * embeddings route to OpenAI (default) or Ollama.
401
- * This is the SOLE embedding config for the entire framework.
1156
+ * Embedding configuration model, provider, dimensions.
1157
+ * Anthropic has no embeddings API; embeddings always route to OpenAI or Ollama.
1158
+ * This is the sole embedding config for the entire framework.
1159
+ * Used by semantic cache, memory similarity search, and verification layers.
1160
+ *
1161
+ * @default { model: "text-embedding-3-small", dimensions: 1536, provider: "openai", batchSize: 100 }
402
1162
  */
403
1163
  readonly embeddingConfig: EmbeddingConfig;
404
1164
  /**
405
1165
  * Enable Anthropic prompt caching.
406
- * When true, memory context injections are wrapped in
407
- * `cache_control: { type: "ephemeral" }` blocks.
1166
+ * When true, memory context injections and system prompts are wrapped in
1167
+ * `cache_control: { type: "ephemeral" }` blocks to reduce costs.
1168
+ * Non-Anthropic providers silently ignore cache control directives.
1169
+ * Automatically set to true if defaultModel starts with "claude".
1170
+ *
1171
+ * @default true if defaultModel starts with "claude", false otherwise
408
1172
  */
409
1173
  readonly supportsPromptCaching: boolean;
1174
+ /**
1175
+ * Maximum number of retries for transient LLM request failures.
1176
+ * Applied with exponential backoff (2^n seconds between attempts).
1177
+ *
1178
+ * @default 3
1179
+ */
410
1180
  readonly maxRetries: number;
1181
+ /**
1182
+ * Request timeout in milliseconds.
1183
+ * LLM requests exceeding this duration are aborted.
1184
+ *
1185
+ * @default 30000 (30 seconds)
1186
+ */
411
1187
  readonly timeoutMs: number;
1188
+ /**
1189
+ * Enable/disable thinking mode for thinking-capable models.
1190
+ * - `true` — Always enable thinking (e.g., qwen3.5, DeepSeek-R1)
1191
+ * - `false` — Always disable thinking (e.g., cogito:14b that crashes with think:true)
1192
+ * - `undefined` — Auto-detect based on model capabilities (Ollama only)
1193
+ *
1194
+ * @default undefined (auto-detect)
1195
+ */
1196
+ readonly thinking?: boolean;
1197
+ /**
1198
+ * Default maximum output tokens for LLM responses.
1199
+ * Used if a CompletionRequest does not specify maxTokens.
1200
+ * Set lower for faster responses; higher for longer outputs.
1201
+ *
1202
+ * @default 4096
1203
+ */
412
1204
  readonly defaultMaxTokens: number;
1205
+ /**
1206
+ * Default sampling temperature (0.0-1.0).
1207
+ * Used if a CompletionRequest does not specify temperature.
1208
+ * 0.0 = deterministic; 1.0 = maximum randomness.
1209
+ *
1210
+ * @default 0.7 (good balance of creativity and coherence)
1211
+ */
413
1212
  readonly defaultTemperature: number;
414
1213
  /**
415
1214
  * LLM request/response observability verbosity.
416
- * "full" captures the complete request and response payloads.
417
- * "metadata" captures only timing/token counts (cheaper for production).
418
- * Default: "full" always capture everything during development.
1215
+ * Determines what data is captured in LLMRequestEvent for observability.
1216
+ *
1217
+ * - **"full"**: Capture complete request/response payloads (useful for debugging, higher overhead)
1218
+ * - **"metadata"**: Capture only timing, token counts, and cost (lightweight, production-safe)
1219
+ *
1220
+ * @default "full" (capture everything)
1221
+ *
1222
+ * @example
1223
+ * ```typescript
1224
+ * // Development: full details
1225
+ * observabilityVerbosity: process.env.NODE_ENV === "production" ? "metadata" : "full"
1226
+ * ```
419
1227
  */
420
1228
  readonly observabilityVerbosity: ObservabilityVerbosity;
421
1229
  };
422
1230
  /**
423
- * Build LLMConfig from environment variables.
1231
+ * Effect-TS Layer that provides LLMConfig from environment variables.
1232
+ * Use this layer to automatically populate LLMConfig from process.env.
1233
+ * Can be overridden with a custom layer for testing or custom configuration.
1234
+ *
1235
+ * @example
1236
+ * ```typescript
1237
+ * const effect = Effect.gen(function* () {
1238
+ * const config = yield* LLMConfig;
1239
+ * console.log(config.defaultModel);
1240
+ * }).pipe(Effect.provide(LLMConfigFromEnv));
1241
+ *
1242
+ * Effect.runPromise(effect);
1243
+ * ```
1244
+ *
1245
+ * @see llmConfigFromEnv
424
1246
  */
425
1247
  declare const LLMConfigFromEnv: Layer.Layer<LLMConfig, never, never>;
426
1248
 
@@ -462,6 +1284,8 @@ declare const LocalProviderLive: Layer.Layer<LLMService, never, LLMConfig>;
462
1284
 
463
1285
  declare const GeminiProviderLive: Layer.Layer<LLMService, never, LLMConfig>;
464
1286
 
1287
+ declare const LiteLLMProviderLive: Layer.Layer<LLMService, never, LLMConfig>;
1288
+
465
1289
  /**
466
1290
  * Create a deterministic test LLM service.
467
1291
  * Returns responses based on pattern matching against prompt content.
@@ -576,14 +1400,30 @@ declare const ComplexityAnalysisSchema: Schema.Struct<{
576
1400
  }>;
577
1401
  type ComplexityAnalysis = Schema.Schema.Type<typeof ComplexityAnalysisSchema>;
578
1402
 
1403
+ /**
1404
+ * Default model constants for each LLM provider.
1405
+ * Single source of truth — used by providers at construction time
1406
+ * and by the runtime to resolve model names for display/metrics.
1407
+ */
1408
+ declare const PROVIDER_DEFAULT_MODELS: Record<string, string>;
1409
+ /**
1410
+ * Get the default model for a given provider.
1411
+ * Returns undefined if the provider is not recognized.
1412
+ */
1413
+ declare function getProviderDefaultModel(provider: string): string | undefined;
1414
+
579
1415
  /**
580
1416
  * Create the LLM provider layer for a specific provider.
581
1417
  * Uses env vars for configuration by default.
582
1418
  */
583
- declare const createLLMProviderLayer: (provider?: "anthropic" | "openai" | "ollama" | "gemini" | "test", testResponses?: Record<string, string>, model?: string) => Layer.Layer<LLMService | PromptManager, never, never>;
1419
+ declare const createLLMProviderLayer: (provider?: "anthropic" | "openai" | "ollama" | "gemini" | "litellm" | "test", testResponses?: Record<string, string>, model?: string, modelParams?: {
1420
+ thinking?: boolean;
1421
+ temperature?: number;
1422
+ maxTokens?: number;
1423
+ }) => Layer.Layer<LLMService | PromptManager, never, never>;
584
1424
  /**
585
1425
  * LLM layer with custom config (for programmatic use).
586
1426
  */
587
- declare const createLLMProviderLayerWithConfig: (config: typeof LLMConfig.Service, provider?: "anthropic" | "openai" | "ollama" | "gemini") => Layer.Layer<LLMService | PromptManager, never, never>;
1427
+ declare const createLLMProviderLayerWithConfig: (config: typeof LLMConfig.Service, provider?: "anthropic" | "openai" | "ollama" | "gemini" | "litellm") => Layer.Layer<LLMService | PromptManager, never, never>;
588
1428
 
589
- export { AnthropicProviderLive, type CacheControl, CacheControlSchema, type CacheableContentBlock, type CompletionRequest, type CompletionResponse, CompletionResponseSchema, type ComplexityAnalysis, ComplexityAnalysisSchema, type ContentBlock, DefaultEmbeddingConfig, type EmbeddingConfig, EmbeddingConfigSchema, GeminiProviderLive, ImageContentBlockSchema, type ImageSource, ImageSourceSchema, LLMConfig, LLMConfigFromEnv, LLMContextOverflowError, LLMError, type LLMErrors, type LLMMessage, LLMParseError, type LLMProvider, LLMProviderType, LLMRateLimitError, LLMService, LLMTimeoutError, LocalProviderLive, type ModelConfig, ModelConfigSchema, type ModelPresetName, ModelPresets, OpenAIProviderLive, type Plan, PlanSchema, PromptManager, PromptManagerLive, type ReActAction, ReActActionSchema, type Reflection, ReflectionSchema, type StopReason, StopReasonSchema, type StrategySelection, StrategySelectionSchema, type StreamEvent, type StructuredCompletionRequest, TestLLMService, TestLLMServiceLayer, TextContentBlockSchema, type ThoughtEvaluation, ThoughtEvaluationSchema, type TokenUsage, TokenUsageSchema, type ToolCall, ToolCallSchema, type ToolDefinition, ToolDefinitionSchema, ToolResultContentBlockSchema, ToolUseContentBlockSchema, type TruncationStrategy, calculateCost, createLLMProviderLayer, createLLMProviderLayerWithConfig, estimateTokenCount, llmConfigFromEnv, makeCacheable, retryPolicy };
1429
+ export { AnthropicProviderLive, type CacheControl, CacheControlSchema, type CacheableContentBlock, type CompletionRequest, type CompletionResponse, CompletionResponseSchema, type ComplexityAnalysis, ComplexityAnalysisSchema, type ContentBlock, DefaultEmbeddingConfig, type EmbeddingConfig, EmbeddingConfigSchema, GeminiProviderLive, ImageContentBlockSchema, type ImageSource, ImageSourceSchema, LLMConfig, LLMConfigFromEnv, LLMContextOverflowError, LLMError, type LLMErrors, type LLMMessage, LLMParseError, type LLMProvider, LLMProviderType, LLMRateLimitError, LLMService, LLMTimeoutError, LiteLLMProviderLive, LocalProviderLive, type ModelConfig, ModelConfigSchema, type ModelPresetName, ModelPresets, OpenAIProviderLive, PROVIDER_DEFAULT_MODELS, type Plan, PlanSchema, PromptManager, PromptManagerLive, type ReActAction, ReActActionSchema, type Reflection, ReflectionSchema, type StopReason, StopReasonSchema, type StrategySelection, StrategySelectionSchema, type StreamEvent, type StructuredCompletionRequest, type StructuredOutputCapabilities, TestLLMService, TestLLMServiceLayer, TextContentBlockSchema, type ThoughtEvaluation, ThoughtEvaluationSchema, type TokenUsage, TokenUsageSchema, type ToolCall, ToolCallSchema, type ToolDefinition, ToolDefinitionSchema, ToolResultContentBlockSchema, ToolUseContentBlockSchema, type TruncationStrategy, calculateCost, createLLMProviderLayer, createLLMProviderLayerWithConfig, estimateTokenCount, getProviderDefaultModel, llmConfigFromEnv, makeCacheable, retryPolicy };