@reactive-agents/llm-provider 0.4.0 → 0.5.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +822 -22
- package/dist/index.js +534 -75
- package/dist/index.js.map +1 -1
- package/package.json +2 -2
package/dist/index.d.ts
CHANGED
|
@@ -3,242 +3,807 @@ import * as effect_Cause from 'effect/Cause';
|
|
|
3
3
|
import * as effect_Types from 'effect/Types';
|
|
4
4
|
import * as effect_Duration from 'effect/Duration';
|
|
5
5
|
|
|
6
|
-
|
|
6
|
+
/**
|
|
7
|
+
* Schema for LLM provider selection.
|
|
8
|
+
* Supported providers: anthropic, openai, ollama, gemini, litellm, custom.
|
|
9
|
+
*
|
|
10
|
+
* @example
|
|
11
|
+
* ```typescript
|
|
12
|
+
* const provider: LLMProvider = "anthropic";
|
|
13
|
+
* ```
|
|
14
|
+
*/
|
|
15
|
+
declare const LLMProviderType: Schema.Literal<["anthropic", "openai", "ollama", "gemini", "litellm", "custom"]>;
|
|
16
|
+
/**
|
|
17
|
+
* Union of supported LLM provider names.
|
|
18
|
+
* - "anthropic": Claude models via Anthropic API
|
|
19
|
+
* - "openai": GPT models via OpenAI API
|
|
20
|
+
* - "ollama": Local models via Ollama
|
|
21
|
+
* - "gemini": Google Gemini models
|
|
22
|
+
* - "litellm": LiteLLM proxy (40+ model providers)
|
|
23
|
+
* - "custom": User-defined provider adapter
|
|
24
|
+
*/
|
|
7
25
|
type LLMProvider = Schema.Schema.Type<typeof LLMProviderType>;
|
|
26
|
+
/**
|
|
27
|
+
* Schema for embedding model configuration.
|
|
28
|
+
* Embeddings are used for semantic caching, memory similarity search, and verification.
|
|
29
|
+
* Anthropic provides no embeddings API; embeddings always route to OpenAI or Ollama.
|
|
30
|
+
*
|
|
31
|
+
* @example
|
|
32
|
+
* ```typescript
|
|
33
|
+
* const config: EmbeddingConfig = {
|
|
34
|
+
* model: "text-embedding-3-small",
|
|
35
|
+
* dimensions: 1536,
|
|
36
|
+
* provider: "openai",
|
|
37
|
+
* batchSize: 100
|
|
38
|
+
* };
|
|
39
|
+
* ```
|
|
40
|
+
*/
|
|
8
41
|
declare const EmbeddingConfigSchema: Schema.Struct<{
|
|
42
|
+
/** Embedding model name (e.g., "text-embedding-3-small") */
|
|
9
43
|
model: typeof Schema.String;
|
|
44
|
+
/** Output embedding vector dimensionality */
|
|
10
45
|
dimensions: typeof Schema.Number;
|
|
46
|
+
/** Provider hosting the embedding model */
|
|
11
47
|
provider: Schema.Literal<["openai", "ollama"]>;
|
|
48
|
+
/** Maximum vectors to embed in a single API call (default: 100) */
|
|
12
49
|
batchSize: Schema.optional<typeof Schema.Number>;
|
|
13
50
|
}>;
|
|
51
|
+
/**
|
|
52
|
+
* Embedding configuration type.
|
|
53
|
+
* Specifies the embedding model and provider for semantic operations.
|
|
54
|
+
*/
|
|
14
55
|
type EmbeddingConfig = Schema.Schema.Type<typeof EmbeddingConfigSchema>;
|
|
56
|
+
/**
|
|
57
|
+
* Default embedding configuration.
|
|
58
|
+
* Uses OpenAI's text-embedding-3-small with 1536 dimensions.
|
|
59
|
+
*
|
|
60
|
+
* @default { model: "text-embedding-3-small", dimensions: 1536, provider: "openai", batchSize: 100 }
|
|
61
|
+
*/
|
|
15
62
|
declare const DefaultEmbeddingConfig: EmbeddingConfig;
|
|
63
|
+
/**
|
|
64
|
+
* Schema for LLM model configuration options.
|
|
65
|
+
* Includes provider, model name, and optional sampling/output parameters.
|
|
66
|
+
*
|
|
67
|
+
* @example
|
|
68
|
+
* ```typescript
|
|
69
|
+
* const config: ModelConfig = {
|
|
70
|
+
* provider: "anthropic",
|
|
71
|
+
* model: "claude-opus-4-20250514",
|
|
72
|
+
* maxTokens: 4096,
|
|
73
|
+
* temperature: 0.7
|
|
74
|
+
* };
|
|
75
|
+
* ```
|
|
76
|
+
*/
|
|
16
77
|
declare const ModelConfigSchema: Schema.Struct<{
|
|
17
|
-
|
|
78
|
+
/** LLM provider identifier */
|
|
79
|
+
provider: Schema.Literal<["anthropic", "openai", "ollama", "gemini", "litellm", "custom"]>;
|
|
80
|
+
/** Model name/identifier for the provider */
|
|
18
81
|
model: typeof Schema.String;
|
|
82
|
+
/** Maximum tokens in response (optional) */
|
|
19
83
|
maxTokens: Schema.optional<typeof Schema.Number>;
|
|
84
|
+
/** Sampling temperature 0.0-1.0 (optional) */
|
|
20
85
|
temperature: Schema.optional<typeof Schema.Number>;
|
|
86
|
+
/** Top-p (nucleus) sampling probability (optional) */
|
|
21
87
|
topP: Schema.optional<typeof Schema.Number>;
|
|
88
|
+
/** Stop sequences to halt generation (optional) */
|
|
22
89
|
stopSequences: Schema.optional<Schema.Array$<typeof Schema.String>>;
|
|
23
90
|
}>;
|
|
91
|
+
/**
|
|
92
|
+
* LLM model configuration type.
|
|
93
|
+
* Specifies which LLM to use and how to configure its behavior.
|
|
94
|
+
*/
|
|
24
95
|
type ModelConfig = Schema.Schema.Type<typeof ModelConfigSchema>;
|
|
96
|
+
/**
|
|
97
|
+
* Pre-configured model profiles for popular LLMs.
|
|
98
|
+
* Each preset includes cost estimates, context window, and quality tiers.
|
|
99
|
+
* Quality tier: 0.0 (low) to 1.0 (highest).
|
|
100
|
+
* Cost: per 1 million input/output tokens in USD.
|
|
101
|
+
*
|
|
102
|
+
* @example
|
|
103
|
+
* ```typescript
|
|
104
|
+
* const preset = ModelPresets["claude-opus"];
|
|
105
|
+
* // { provider: "anthropic", model: "claude-opus-4-20250514", costPer1MInput: 15.0, ... }
|
|
106
|
+
* ```
|
|
107
|
+
*/
|
|
25
108
|
declare const ModelPresets: {
|
|
109
|
+
/**
|
|
110
|
+
* Claude 3.5 Haiku — fast, cost-effective Anthropic model.
|
|
111
|
+
* Best for low-latency, simple reasoning tasks; not recommended for complex analysis.
|
|
112
|
+
*/
|
|
26
113
|
readonly "claude-haiku": {
|
|
27
114
|
readonly provider: "anthropic";
|
|
28
115
|
readonly model: "claude-3-5-haiku-20241022";
|
|
116
|
+
/** Cost per 1 million input tokens in USD */
|
|
29
117
|
readonly costPer1MInput: 1;
|
|
118
|
+
/** Cost per 1 million output tokens in USD */
|
|
30
119
|
readonly costPer1MOutput: 5;
|
|
120
|
+
/** Maximum context window in tokens */
|
|
31
121
|
readonly maxContext: 200000;
|
|
122
|
+
/** Quality tier (0.6 = reliable for simple tasks) */
|
|
32
123
|
readonly quality: 0.6;
|
|
33
124
|
};
|
|
125
|
+
/**
|
|
126
|
+
* Claude Sonnet 4 — balanced Anthropic model.
|
|
127
|
+
* Recommended for general-purpose reasoning, tool use, and production agents.
|
|
128
|
+
*/
|
|
34
129
|
readonly "claude-sonnet": {
|
|
35
130
|
readonly provider: "anthropic";
|
|
36
131
|
readonly model: "claude-sonnet-4-20250514";
|
|
37
132
|
readonly costPer1MInput: 3;
|
|
38
133
|
readonly costPer1MOutput: 15;
|
|
39
134
|
readonly maxContext: 200000;
|
|
135
|
+
/** Quality tier (0.85 = excellent reasoning) */
|
|
40
136
|
readonly quality: 0.85;
|
|
41
137
|
};
|
|
138
|
+
/**
|
|
139
|
+
* Claude Sonnet 4.5 — latest Anthropic model.
|
|
140
|
+
* Superior reasoning over Sonnet 4; recommended for complex multi-step reasoning.
|
|
141
|
+
*/
|
|
42
142
|
readonly "claude-sonnet-4-5": {
|
|
43
143
|
readonly provider: "anthropic";
|
|
44
144
|
readonly model: "claude-sonnet-4-5-20250929";
|
|
45
145
|
readonly costPer1MInput: 3;
|
|
46
146
|
readonly costPer1MOutput: 15;
|
|
47
147
|
readonly maxContext: 200000;
|
|
148
|
+
/** Quality tier (0.9 = very strong reasoning) */
|
|
48
149
|
readonly quality: 0.9;
|
|
49
150
|
};
|
|
151
|
+
/**
|
|
152
|
+
* Claude Opus 4 — most capable Anthropic model.
|
|
153
|
+
* Best for complex analysis, research, and high-accuracy multi-hop reasoning.
|
|
154
|
+
* Largest context window (1M tokens); highest cost.
|
|
155
|
+
*/
|
|
50
156
|
readonly "claude-opus": {
|
|
51
157
|
readonly provider: "anthropic";
|
|
52
158
|
readonly model: "claude-opus-4-20250514";
|
|
53
159
|
readonly costPer1MInput: 15;
|
|
54
160
|
readonly costPer1MOutput: 75;
|
|
55
161
|
readonly maxContext: 1000000;
|
|
162
|
+
/** Quality tier (1.0 = frontier-class reasoning) */
|
|
56
163
|
readonly quality: 1;
|
|
57
164
|
};
|
|
165
|
+
/**
|
|
166
|
+
* GPT-4o Mini — fast, low-cost OpenAI model.
|
|
167
|
+
* Good for simple tasks and high-throughput scenarios.
|
|
168
|
+
*/
|
|
58
169
|
readonly "gpt-4o-mini": {
|
|
59
170
|
readonly provider: "openai";
|
|
60
171
|
readonly model: "gpt-4o-mini";
|
|
61
172
|
readonly costPer1MInput: 0.15;
|
|
62
173
|
readonly costPer1MOutput: 0.6;
|
|
63
174
|
readonly maxContext: 128000;
|
|
175
|
+
/** Quality tier (0.55 = capable but less reliable for complex reasoning) */
|
|
64
176
|
readonly quality: 0.55;
|
|
65
177
|
};
|
|
178
|
+
/**
|
|
179
|
+
* GPT-4o — latest OpenAI flagship model.
|
|
180
|
+
* Strong reasoning, multimodal support; recommended for tool use and complex analysis.
|
|
181
|
+
*/
|
|
66
182
|
readonly "gpt-4o": {
|
|
67
183
|
readonly provider: "openai";
|
|
68
184
|
readonly model: "gpt-4o";
|
|
69
185
|
readonly costPer1MInput: 2.5;
|
|
70
186
|
readonly costPer1MOutput: 10;
|
|
71
187
|
readonly maxContext: 128000;
|
|
188
|
+
/** Quality tier (0.8 = very good reasoning) */
|
|
72
189
|
readonly quality: 0.8;
|
|
73
190
|
};
|
|
191
|
+
/**
|
|
192
|
+
* Gemini 2.0 Flash — fast Google model.
|
|
193
|
+
* Excellent speed and cost efficiency; large 1M context window.
|
|
194
|
+
*/
|
|
74
195
|
readonly "gemini-2.0-flash": {
|
|
75
196
|
readonly provider: "gemini";
|
|
76
197
|
readonly model: "gemini-2.0-flash";
|
|
77
198
|
readonly costPer1MInput: 0.1;
|
|
78
199
|
readonly costPer1MOutput: 0.4;
|
|
79
200
|
readonly maxContext: 1000000;
|
|
201
|
+
/** Quality tier (0.75 = good reasoning) */
|
|
80
202
|
readonly quality: 0.75;
|
|
81
203
|
};
|
|
204
|
+
/**
|
|
205
|
+
* Gemini 2.5 Pro Preview — advanced Google model.
|
|
206
|
+
* Superior reasoning to Flash; large context window and competitive pricing.
|
|
207
|
+
*/
|
|
82
208
|
readonly "gemini-2.5-pro": {
|
|
83
209
|
readonly provider: "gemini";
|
|
84
210
|
readonly model: "gemini-2.5-pro-preview-03-25";
|
|
85
211
|
readonly costPer1MInput: 1.25;
|
|
86
212
|
readonly costPer1MOutput: 10;
|
|
87
213
|
readonly maxContext: 1000000;
|
|
214
|
+
/** Quality tier (0.95 = excellent reasoning) */
|
|
88
215
|
readonly quality: 0.95;
|
|
89
216
|
};
|
|
90
217
|
};
|
|
218
|
+
/**
|
|
219
|
+
* Union of all model preset names.
|
|
220
|
+
* Use to select a pre-configured model with cost/quality/context metadata.
|
|
221
|
+
*
|
|
222
|
+
* @example
|
|
223
|
+
* ```typescript
|
|
224
|
+
* const presetName: ModelPresetName = "claude-opus";
|
|
225
|
+
* const preset = ModelPresets[presetName];
|
|
226
|
+
* ```
|
|
227
|
+
*/
|
|
91
228
|
type ModelPresetName = keyof typeof ModelPresets;
|
|
229
|
+
/**
|
|
230
|
+
* Schema for Anthropic prompt caching control.
|
|
231
|
+
* Currently only supports "ephemeral" type (cache for this request only).
|
|
232
|
+
* Non-Anthropic providers silently ignore cache_control directives.
|
|
233
|
+
*
|
|
234
|
+
* @example
|
|
235
|
+
* ```typescript
|
|
236
|
+
* const cacheControl: CacheControl = { type: "ephemeral" };
|
|
237
|
+
* ```
|
|
238
|
+
*/
|
|
92
239
|
declare const CacheControlSchema: Schema.Struct<{
|
|
240
|
+
/** Cache type: "ephemeral" for request-scoped caching */
|
|
93
241
|
type: Schema.Literal<["ephemeral"]>;
|
|
94
242
|
}>;
|
|
243
|
+
/**
|
|
244
|
+
* Anthropic prompt caching configuration.
|
|
245
|
+
* Wraps text content blocks to enable prompt caching optimization.
|
|
246
|
+
* Reduces costs for repeated context; only supported on Anthropic provider.
|
|
247
|
+
*/
|
|
95
248
|
type CacheControl = Schema.Schema.Type<typeof CacheControlSchema>;
|
|
249
|
+
/**
|
|
250
|
+
* Schema for image source reference.
|
|
251
|
+
* Supports base64-encoded or URL-referenced images in PNG, JPEG, GIF, or WebP format.
|
|
252
|
+
*
|
|
253
|
+
* @example
|
|
254
|
+
* ```typescript
|
|
255
|
+
* const source: ImageSource = {
|
|
256
|
+
* type: "base64",
|
|
257
|
+
* media_type: "image/png",
|
|
258
|
+
* data: "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg=="
|
|
259
|
+
* };
|
|
260
|
+
* ```
|
|
261
|
+
*/
|
|
96
262
|
declare const ImageSourceSchema: Schema.Struct<{
|
|
263
|
+
/** Image source type: "base64" for encoded data or "url" for HTTP(S) URL */
|
|
97
264
|
type: Schema.Literal<["base64", "url"]>;
|
|
265
|
+
/** MIME type of image: PNG, JPEG, GIF, or WebP */
|
|
98
266
|
media_type: Schema.Literal<["image/png", "image/jpeg", "image/gif", "image/webp"]>;
|
|
267
|
+
/** Either base64-encoded data or HTTPS URL */
|
|
99
268
|
data: typeof Schema.String;
|
|
100
269
|
}>;
|
|
270
|
+
/**
|
|
271
|
+
* Image source reference type.
|
|
272
|
+
* Either a base64-encoded image or an HTTPS URL to an image resource.
|
|
273
|
+
*/
|
|
101
274
|
type ImageSource = Schema.Schema.Type<typeof ImageSourceSchema>;
|
|
275
|
+
/**
|
|
276
|
+
* Schema for text content blocks.
|
|
277
|
+
* Supports optional Anthropic prompt caching via cache_control.
|
|
278
|
+
*
|
|
279
|
+
* @example
|
|
280
|
+
* ```typescript
|
|
281
|
+
* const textBlock: TextContentBlock = {
|
|
282
|
+
* type: "text",
|
|
283
|
+
* text: "This is a text message"
|
|
284
|
+
* };
|
|
285
|
+
* ```
|
|
286
|
+
*/
|
|
102
287
|
declare const TextContentBlockSchema: Schema.Struct<{
|
|
288
|
+
/** Content type identifier */
|
|
103
289
|
type: Schema.Literal<["text"]>;
|
|
290
|
+
/** Text content */
|
|
104
291
|
text: typeof Schema.String;
|
|
292
|
+
/** Optional Anthropic cache control directive */
|
|
105
293
|
cache_control: Schema.optional<Schema.Struct<{
|
|
294
|
+
/** Cache type: "ephemeral" for request-scoped caching */
|
|
106
295
|
type: Schema.Literal<["ephemeral"]>;
|
|
107
296
|
}>>;
|
|
108
297
|
}>;
|
|
298
|
+
/**
|
|
299
|
+
* Schema for image content blocks.
|
|
300
|
+
*
|
|
301
|
+
* @example
|
|
302
|
+
* ```typescript
|
|
303
|
+
* const imageBlock: ImageContentBlock = {
|
|
304
|
+
* type: "image",
|
|
305
|
+
* source: { type: "url", media_type: "image/png", data: "https://..." }
|
|
306
|
+
* };
|
|
307
|
+
* ```
|
|
308
|
+
*/
|
|
109
309
|
declare const ImageContentBlockSchema: Schema.Struct<{
|
|
310
|
+
/** Content type identifier */
|
|
110
311
|
type: Schema.Literal<["image"]>;
|
|
312
|
+
/** Image source reference */
|
|
111
313
|
source: Schema.Struct<{
|
|
314
|
+
/** Image source type: "base64" for encoded data or "url" for HTTP(S) URL */
|
|
112
315
|
type: Schema.Literal<["base64", "url"]>;
|
|
316
|
+
/** MIME type of image: PNG, JPEG, GIF, or WebP */
|
|
113
317
|
media_type: Schema.Literal<["image/png", "image/jpeg", "image/gif", "image/webp"]>;
|
|
318
|
+
/** Either base64-encoded data or HTTPS URL */
|
|
114
319
|
data: typeof Schema.String;
|
|
115
320
|
}>;
|
|
116
321
|
}>;
|
|
322
|
+
/**
|
|
323
|
+
* Schema for tool use content blocks (model invoking a tool).
|
|
324
|
+
*
|
|
325
|
+
* @example
|
|
326
|
+
* ```typescript
|
|
327
|
+
* const toolBlock: ToolUseContentBlock = {
|
|
328
|
+
* type: "tool_use",
|
|
329
|
+
* id: "toolu_123",
|
|
330
|
+
* name: "file-read",
|
|
331
|
+
* input: { path: "./output.txt" }
|
|
332
|
+
* };
|
|
333
|
+
* ```
|
|
334
|
+
*/
|
|
117
335
|
declare const ToolUseContentBlockSchema: Schema.Struct<{
|
|
336
|
+
/** Content type identifier */
|
|
118
337
|
type: Schema.Literal<["tool_use"]>;
|
|
338
|
+
/** Unique tool call identifier */
|
|
119
339
|
id: typeof Schema.String;
|
|
340
|
+
/** Tool name being invoked */
|
|
120
341
|
name: typeof Schema.String;
|
|
342
|
+
/** Tool parameters (JSON-compatible object) */
|
|
121
343
|
input: typeof Schema.Unknown;
|
|
122
344
|
}>;
|
|
345
|
+
/**
|
|
346
|
+
* Schema for tool result content blocks (system returning tool output).
|
|
347
|
+
*
|
|
348
|
+
* @example
|
|
349
|
+
* ```typescript
|
|
350
|
+
* const resultBlock: ToolResultContentBlock = {
|
|
351
|
+
* type: "tool_result",
|
|
352
|
+
* tool_use_id: "toolu_123",
|
|
353
|
+
* content: "File contents..."
|
|
354
|
+
* };
|
|
355
|
+
* ```
|
|
356
|
+
*/
|
|
123
357
|
declare const ToolResultContentBlockSchema: Schema.Struct<{
|
|
358
|
+
/** Content type identifier */
|
|
124
359
|
type: Schema.Literal<["tool_result"]>;
|
|
360
|
+
/** ID of tool call this result corresponds to */
|
|
125
361
|
tool_use_id: typeof Schema.String;
|
|
362
|
+
/** Tool result/output content */
|
|
126
363
|
content: typeof Schema.String;
|
|
127
364
|
}>;
|
|
365
|
+
/**
|
|
366
|
+
* Union of all content block types used in LLM messages.
|
|
367
|
+
* Content blocks allow mixing text, images, tool invocations, and tool results.
|
|
368
|
+
*
|
|
369
|
+
* @example
|
|
370
|
+
* ```typescript
|
|
371
|
+
* const blocks: readonly ContentBlock[] = [
|
|
372
|
+
* { type: "text", text: "Analyze this image:" },
|
|
373
|
+
* { type: "image", source: { type: "url", media_type: "image/png", data: "https://..." } }
|
|
374
|
+
* ];
|
|
375
|
+
* ```
|
|
376
|
+
*/
|
|
128
377
|
type ContentBlock = {
|
|
378
|
+
/** Text content (optionally cached with Anthropic) */
|
|
129
379
|
readonly type: "text";
|
|
130
380
|
readonly text: string;
|
|
131
381
|
readonly cache_control?: CacheControl;
|
|
132
382
|
} | {
|
|
383
|
+
/** Image content */
|
|
133
384
|
readonly type: "image";
|
|
134
385
|
readonly source: ImageSource;
|
|
135
386
|
} | {
|
|
387
|
+
/** Model invoking a tool */
|
|
136
388
|
readonly type: "tool_use";
|
|
137
389
|
readonly id: string;
|
|
138
390
|
readonly name: string;
|
|
139
391
|
readonly input: unknown;
|
|
140
392
|
} | {
|
|
393
|
+
/** System returning tool output */
|
|
141
394
|
readonly type: "tool_result";
|
|
142
395
|
readonly tool_use_id: string;
|
|
143
396
|
readonly content: string;
|
|
144
397
|
};
|
|
398
|
+
/**
|
|
399
|
+
* Text content block with cache control enabled.
|
|
400
|
+
* Used when text context should be cached for cost reduction (Anthropic only).
|
|
401
|
+
* Non-Anthropic providers silently ignore the cache_control directive.
|
|
402
|
+
*
|
|
403
|
+
* @example
|
|
404
|
+
* ```typescript
|
|
405
|
+
* const cached: CacheableContentBlock = {
|
|
406
|
+
* type: "text",
|
|
407
|
+
* text: "Expensive context (system prompt, instructions, etc)",
|
|
408
|
+
* cache_control: { type: "ephemeral" }
|
|
409
|
+
* };
|
|
410
|
+
* ```
|
|
411
|
+
*/
|
|
145
412
|
type CacheableContentBlock = {
|
|
413
|
+
/** Always "text" */
|
|
146
414
|
readonly type: "text";
|
|
415
|
+
/** Cached text content */
|
|
147
416
|
readonly text: string;
|
|
417
|
+
/** Cache control directive (always ephemeral) */
|
|
148
418
|
readonly cache_control: CacheControl;
|
|
149
419
|
};
|
|
150
420
|
/**
|
|
151
|
-
*
|
|
152
|
-
*
|
|
421
|
+
* Wrap plain text in a cacheable content block.
|
|
422
|
+
* Enables Anthropic prompt caching for the given text (no-op for other providers).
|
|
423
|
+
* Useful for repeated context like system prompts, instructions, or reference documents.
|
|
424
|
+
*
|
|
425
|
+
* @param text — The text to cache
|
|
426
|
+
* @returns A content block with ephemeral cache control enabled
|
|
427
|
+
*
|
|
428
|
+
* @example
|
|
429
|
+
* ```typescript
|
|
430
|
+
* const cached = makeCacheable("You are a helpful assistant...");
|
|
431
|
+
* // Returns: { type: "text", text: "...", cache_control: { type: "ephemeral" } }
|
|
432
|
+
* ```
|
|
153
433
|
*/
|
|
154
434
|
declare const makeCacheable: (text: string) => CacheableContentBlock;
|
|
435
|
+
/**
|
|
436
|
+
* Union of LLM message roles.
|
|
437
|
+
* Each message has a role (system, user, assistant, tool) and content.
|
|
438
|
+
*
|
|
439
|
+
* - **system**: Instructions/context set by the agent developer. Content is always a string.
|
|
440
|
+
* - **user**: User query or context provided by caller. Content is string or content blocks.
|
|
441
|
+
* - **assistant**: Model response or thoughts. Content is string or content blocks (including tool_use).
|
|
442
|
+
* - **tool**: Tool execution result returned to model. Content is always string.
|
|
443
|
+
*
|
|
444
|
+
* @example
|
|
445
|
+
* ```typescript
|
|
446
|
+
* const messages: readonly LLMMessage[] = [
|
|
447
|
+
* { role: "system", content: "You are a helpful assistant." },
|
|
448
|
+
* { role: "user", content: "What is 2+2?" },
|
|
449
|
+
* { role: "assistant", content: "2+2 equals 4." }
|
|
450
|
+
* ];
|
|
451
|
+
*
|
|
452
|
+
* const withTools: readonly LLMMessage[] = [
|
|
453
|
+
* { role: "user", content: "Read the file." },
|
|
454
|
+
* {
|
|
455
|
+
* role: "assistant",
|
|
456
|
+
* content: [
|
|
457
|
+
* { type: "text", text: "I'll read that file for you." },
|
|
458
|
+
* { type: "tool_use", id: "toolu_1", name: "file-read", input: { path: "./data.txt" } }
|
|
459
|
+
* ]
|
|
460
|
+
* },
|
|
461
|
+
* { role: "tool", toolCallId: "toolu_1", content: "File contents here..." }
|
|
462
|
+
* ];
|
|
463
|
+
* ```
|
|
464
|
+
*/
|
|
155
465
|
type LLMMessage = {
|
|
466
|
+
/** System prompt/instructions — context set by developer */
|
|
156
467
|
readonly role: "system";
|
|
468
|
+
/** Plain text string only (no content blocks) */
|
|
157
469
|
readonly content: string;
|
|
158
470
|
} | {
|
|
471
|
+
/** User input/query */
|
|
159
472
|
readonly role: "user";
|
|
473
|
+
/** Plain text or multimodal content blocks */
|
|
160
474
|
readonly content: string | readonly ContentBlock[];
|
|
161
475
|
} | {
|
|
476
|
+
/** Model response or reasoning */
|
|
162
477
|
readonly role: "assistant";
|
|
478
|
+
/** Plain text or multimodal content blocks (including tool_use) */
|
|
163
479
|
readonly content: string | readonly ContentBlock[];
|
|
164
480
|
} | {
|
|
481
|
+
/** Tool execution result */
|
|
165
482
|
readonly role: "tool";
|
|
483
|
+
/** Tool call ID this result corresponds to */
|
|
166
484
|
readonly toolCallId: string;
|
|
485
|
+
/** Plain text result/output */
|
|
167
486
|
readonly content: string;
|
|
168
487
|
};
|
|
488
|
+
/**
|
|
489
|
+
* Schema for token usage statistics from an LLM response.
|
|
490
|
+
* Used for cost tracking, budget enforcement, and observability.
|
|
491
|
+
*
|
|
492
|
+
* @example
|
|
493
|
+
* ```typescript
|
|
494
|
+
* const usage: TokenUsage = {
|
|
495
|
+
* inputTokens: 1200,
|
|
496
|
+
* outputTokens: 450,
|
|
497
|
+
* totalTokens: 1650,
|
|
498
|
+
* estimatedCost: 0.0045
|
|
499
|
+
* };
|
|
500
|
+
* ```
|
|
501
|
+
*/
|
|
169
502
|
declare const TokenUsageSchema: Schema.Struct<{
|
|
503
|
+
/** Tokens consumed by the input (messages + system prompt) */
|
|
170
504
|
inputTokens: typeof Schema.Number;
|
|
505
|
+
/** Tokens generated in the response */
|
|
171
506
|
outputTokens: typeof Schema.Number;
|
|
507
|
+
/** Sum of input and output tokens */
|
|
172
508
|
totalTokens: typeof Schema.Number;
|
|
509
|
+
/** Estimated cost in USD based on provider pricing */
|
|
173
510
|
estimatedCost: typeof Schema.Number;
|
|
174
511
|
}>;
|
|
512
|
+
/**
|
|
513
|
+
* Token usage from an LLM response.
|
|
514
|
+
* Tracks input/output tokens separately for cost calculation.
|
|
515
|
+
*/
|
|
175
516
|
type TokenUsage = Schema.Schema.Type<typeof TokenUsageSchema>;
|
|
517
|
+
/**
|
|
518
|
+
* Schema for LLM response termination reason.
|
|
519
|
+
* Indicates why the model stopped generating tokens.
|
|
520
|
+
*
|
|
521
|
+
* @example
|
|
522
|
+
* ```typescript
|
|
523
|
+
* const reason: StopReason = "end_turn"; // Model concluded naturally
|
|
524
|
+
* const reason2: StopReason = "max_tokens"; // Hit output limit
|
|
525
|
+
* ```
|
|
526
|
+
*/
|
|
176
527
|
declare const StopReasonSchema: Schema.Literal<["end_turn", "max_tokens", "stop_sequence", "tool_use"]>;
|
|
528
|
+
/**
|
|
529
|
+
* Reason the LLM stopped generating.
|
|
530
|
+
*
|
|
531
|
+
* - **end_turn**: Model concluded naturally — response is complete.
|
|
532
|
+
* - **max_tokens**: Hit configured output token limit — response may be truncated.
|
|
533
|
+
* - **stop_sequence**: Hit a configured stop sequence — generation halted by design.
|
|
534
|
+
* - **tool_use**: Model is invoking a tool — `toolCalls` array is populated.
|
|
535
|
+
*/
|
|
177
536
|
type StopReason = Schema.Schema.Type<typeof StopReasonSchema>;
|
|
537
|
+
/**
|
|
538
|
+
* Schema for tool definitions.
|
|
539
|
+
* Describes tools available to the LLM, including name, description, and input schema.
|
|
540
|
+
* Tools are passed to the LLM for function calling / tool use.
|
|
541
|
+
*
|
|
542
|
+
* @example
|
|
543
|
+
* ```typescript
|
|
544
|
+
* const tool: ToolDefinition = {
|
|
545
|
+
* name: "file-read",
|
|
546
|
+
* description: "Read a file from disk",
|
|
547
|
+
* inputSchema: {
|
|
548
|
+
* path: { type: "string", description: "File path", required: true }
|
|
549
|
+
* }
|
|
550
|
+
* };
|
|
551
|
+
* ```
|
|
552
|
+
*/
|
|
178
553
|
declare const ToolDefinitionSchema: Schema.Struct<{
|
|
554
|
+
/** Tool identifier (used by model to invoke the tool) */
|
|
179
555
|
name: typeof Schema.String;
|
|
556
|
+
/** Human-readable tool description for the model */
|
|
180
557
|
description: typeof Schema.String;
|
|
558
|
+
/** Input schema describing expected parameters (JSON Schema format) */
|
|
181
559
|
inputSchema: Schema.Record$<typeof Schema.String, typeof Schema.Unknown>;
|
|
182
560
|
}>;
|
|
561
|
+
/**
|
|
562
|
+
* Tool definition.
|
|
563
|
+
* Used to register available functions that the LLM can call.
|
|
564
|
+
* Input schema is a JSON Schema object defining parameters.
|
|
565
|
+
*/
|
|
183
566
|
type ToolDefinition = Schema.Schema.Type<typeof ToolDefinitionSchema>;
|
|
567
|
+
/**
|
|
568
|
+
* Schema for tool invocation.
|
|
569
|
+
* Emitted by the model when it decides to call a tool.
|
|
570
|
+
*
|
|
571
|
+
* @example
|
|
572
|
+
* ```typescript
|
|
573
|
+
* const call: ToolCall = {
|
|
574
|
+
* id: "toolu_123",
|
|
575
|
+
* name: "file-read",
|
|
576
|
+
* input: { path: "./output.txt" }
|
|
577
|
+
* };
|
|
578
|
+
* ```
|
|
579
|
+
*/
|
|
184
580
|
declare const ToolCallSchema: Schema.Struct<{
|
|
581
|
+
/** Unique tool call identifier (generated by model) */
|
|
185
582
|
id: typeof Schema.String;
|
|
583
|
+
/** Tool name to invoke */
|
|
186
584
|
name: typeof Schema.String;
|
|
585
|
+
/** Tool input parameters (arbitrary JSON-compatible object) */
|
|
187
586
|
input: typeof Schema.Unknown;
|
|
188
587
|
}>;
|
|
588
|
+
/**
|
|
589
|
+
* Tool invocation from the LLM.
|
|
590
|
+
* When the model decides to call a tool, this describes which tool and with what inputs.
|
|
591
|
+
*/
|
|
189
592
|
type ToolCall = Schema.Schema.Type<typeof ToolCallSchema>;
|
|
593
|
+
/**
|
|
594
|
+
* Request to the LLM for a completion.
|
|
595
|
+
* Includes messages, model configuration, tool definitions, and sampling parameters.
|
|
596
|
+
* Passed to LLMService.complete() for synchronous LLM calls.
|
|
597
|
+
*
|
|
598
|
+
* @see CompletionResponse — the response type returned by LLMService.complete()
|
|
599
|
+
* @see ToolDefinition — shape of entries in the `tools` array
|
|
600
|
+
* @see ModelConfig — shape of the `model` field
|
|
601
|
+
*
|
|
602
|
+
* @example
|
|
603
|
+
* ```typescript
|
|
604
|
+
* const request: CompletionRequest = {
|
|
605
|
+
* messages: [
|
|
606
|
+
* { role: "system", content: "You are a helpful assistant." },
|
|
607
|
+
* { role: "user", content: "What is the capital of France?" }
|
|
608
|
+
* ],
|
|
609
|
+
* model: { provider: "anthropic", model: "claude-opus-4-20250514" },
|
|
610
|
+
* maxTokens: 1024,
|
|
611
|
+
* temperature: 0.7,
|
|
612
|
+
* tools: [
|
|
613
|
+
* { name: "web-search", description: "Search the web", inputSchema: { query: { type: "string" } } }
|
|
614
|
+
* ]
|
|
615
|
+
* };
|
|
616
|
+
* ```
|
|
617
|
+
*/
|
|
190
618
|
type CompletionRequest = {
|
|
619
|
+
/** Conversation history (at least 1 message required) */
|
|
191
620
|
readonly messages: readonly LLMMessage[];
|
|
621
|
+
/** Model config (provider + model name + optional sampling params) */
|
|
192
622
|
readonly model?: ModelConfig;
|
|
623
|
+
/** Maximum response tokens (optional, uses config default if omitted) */
|
|
193
624
|
readonly maxTokens?: number;
|
|
625
|
+
/** Sampling temperature 0.0-1.0 (optional, uses config default if omitted) */
|
|
194
626
|
readonly temperature?: number;
|
|
627
|
+
/** Stop sequences to halt generation (optional) */
|
|
195
628
|
readonly stopSequences?: readonly string[];
|
|
629
|
+
/** Tools available for the model to call (optional) */
|
|
196
630
|
readonly tools?: readonly ToolDefinition[];
|
|
631
|
+
/** System prompt (optional, prepended to user messages) */
|
|
197
632
|
readonly systemPrompt?: string;
|
|
198
633
|
};
|
|
634
|
+
/**
|
|
635
|
+
* Schema for LLM response.
|
|
636
|
+
* Contains the generated content, stop reason, token usage, and any tool calls.
|
|
637
|
+
*
|
|
638
|
+
* @example
|
|
639
|
+
* ```typescript
|
|
640
|
+
* const response: CompletionResponse = {
|
|
641
|
+
* content: "The capital of France is Paris.",
|
|
642
|
+
* stopReason: "end_turn",
|
|
643
|
+
* usage: { inputTokens: 120, outputTokens: 15, totalTokens: 135, estimatedCost: 0.00041 },
|
|
644
|
+
* model: "claude-opus-4-20250514",
|
|
645
|
+
* toolCalls: undefined
|
|
646
|
+
* };
|
|
647
|
+
* ```
|
|
648
|
+
*/
|
|
199
649
|
declare const CompletionResponseSchema: Schema.Struct<{
|
|
650
|
+
/** Generated response content (text only, no content blocks) */
|
|
200
651
|
content: typeof Schema.String;
|
|
652
|
+
/** Why the model stopped generating */
|
|
201
653
|
stopReason: Schema.Literal<["end_turn", "max_tokens", "stop_sequence", "tool_use"]>;
|
|
654
|
+
/** Token usage statistics */
|
|
202
655
|
usage: Schema.Struct<{
|
|
656
|
+
/** Tokens consumed by the input (messages + system prompt) */
|
|
203
657
|
inputTokens: typeof Schema.Number;
|
|
658
|
+
/** Tokens generated in the response */
|
|
204
659
|
outputTokens: typeof Schema.Number;
|
|
660
|
+
/** Sum of input and output tokens */
|
|
205
661
|
totalTokens: typeof Schema.Number;
|
|
662
|
+
/** Estimated cost in USD based on provider pricing */
|
|
206
663
|
estimatedCost: typeof Schema.Number;
|
|
207
664
|
}>;
|
|
665
|
+
/** Actual model identifier used (may differ from request) */
|
|
208
666
|
model: typeof Schema.String;
|
|
667
|
+
/** Tool calls emitted by the model (if any) */
|
|
209
668
|
toolCalls: Schema.optional<Schema.Array$<Schema.Struct<{
|
|
669
|
+
/** Unique tool call identifier (generated by model) */
|
|
210
670
|
id: typeof Schema.String;
|
|
671
|
+
/** Tool name to invoke */
|
|
211
672
|
name: typeof Schema.String;
|
|
673
|
+
/** Tool input parameters (arbitrary JSON-compatible object) */
|
|
212
674
|
input: typeof Schema.Unknown;
|
|
213
675
|
}>>>;
|
|
214
676
|
}>;
|
|
677
|
+
/**
|
|
678
|
+
* LLM response to a completion request.
|
|
679
|
+
* Contains generated text, stop reason, usage metrics, and optional tool calls.
|
|
680
|
+
*
|
|
681
|
+
* @see CompletionRequest — the request type passed to LLMService.complete()
|
|
682
|
+
* @see StopReason — possible values for the `stopReason` field
|
|
683
|
+
* @see TokenUsage — shape of the `usage` field
|
|
684
|
+
* @see ToolCall — shape of entries in the optional `toolCalls` array
|
|
685
|
+
*/
|
|
215
686
|
type CompletionResponse = Schema.Schema.Type<typeof CompletionResponseSchema>;
|
|
687
|
+
/**
|
|
688
|
+
* Events streamed during an LLM response.
|
|
689
|
+
* Used when streaming responses rather than waiting for full completion.
|
|
690
|
+
* Events arrive in sequence: text_delta(s), then tool_use_start/delta(s) if applicable, then content_complete, then usage.
|
|
691
|
+
*
|
|
692
|
+
* @example
|
|
693
|
+
* ```typescript
|
|
694
|
+
* const events: StreamEvent[] = [
|
|
695
|
+
* { type: "text_delta", text: "The " },
|
|
696
|
+
* { type: "text_delta", text: "capital " },
|
|
697
|
+
* { type: "text_delta", text: "is Paris." },
|
|
698
|
+
* { type: "content_complete", content: "The capital is Paris." },
|
|
699
|
+
* { type: "usage", usage: { inputTokens: 50, outputTokens: 10, totalTokens: 60, estimatedCost: 0.00018 } }
|
|
700
|
+
* ];
|
|
701
|
+
* ```
|
|
702
|
+
*/
|
|
216
703
|
type StreamEvent = {
|
|
704
|
+
/** Text chunk arriving */
|
|
217
705
|
readonly type: "text_delta";
|
|
706
|
+
/** Text chunk content */
|
|
218
707
|
readonly text: string;
|
|
219
708
|
} | {
|
|
709
|
+
/** Tool invocation starting */
|
|
220
710
|
readonly type: "tool_use_start";
|
|
711
|
+
/** Unique tool call ID */
|
|
221
712
|
readonly id: string;
|
|
713
|
+
/** Tool name being invoked */
|
|
222
714
|
readonly name: string;
|
|
223
715
|
} | {
|
|
716
|
+
/** Tool input parameter chunk arriving */
|
|
224
717
|
readonly type: "tool_use_delta";
|
|
718
|
+
/** JSON parameter chunk (accumulated to form full input) */
|
|
225
719
|
readonly input: string;
|
|
226
720
|
} | {
|
|
721
|
+
/** Content generation completed */
|
|
227
722
|
readonly type: "content_complete";
|
|
723
|
+
/** Full accumulated response content */
|
|
228
724
|
readonly content: string;
|
|
229
725
|
} | {
|
|
726
|
+
/** Token usage reported */
|
|
230
727
|
readonly type: "usage";
|
|
728
|
+
/** Final token usage for the request */
|
|
231
729
|
readonly usage: TokenUsage;
|
|
232
730
|
} | {
|
|
731
|
+
/** Error occurred during streaming */
|
|
233
732
|
readonly type: "error";
|
|
733
|
+
/** Error message */
|
|
234
734
|
readonly error: string;
|
|
235
735
|
};
|
|
736
|
+
/**
|
|
737
|
+
* Completion request with structured output validation.
|
|
738
|
+
* Extends CompletionRequest to require the model output conform to a schema.
|
|
739
|
+
* Used when the agent needs guaranteed JSON schema output from the LLM.
|
|
740
|
+
*
|
|
741
|
+
* @see CompletionRequest — base request type this extends
|
|
742
|
+
*
|
|
743
|
+
* @typeParam A — The type that the LLM output must conform to
|
|
744
|
+
*
|
|
745
|
+
* @example
|
|
746
|
+
* ```typescript
|
|
747
|
+
* interface Decision {
|
|
748
|
+
* readonly choice: "yes" | "no";
|
|
749
|
+
* readonly confidence: number;
|
|
750
|
+
* }
|
|
751
|
+
*
|
|
752
|
+
* const request: StructuredCompletionRequest<Decision> = {
|
|
753
|
+
* messages: [{ role: "user", content: "Should I approve this?" }],
|
|
754
|
+
* outputSchema: Schema.Struct({
|
|
755
|
+
* choice: Schema.Literal("yes", "no"),
|
|
756
|
+
* confidence: Schema.Number
|
|
757
|
+
* }),
|
|
758
|
+
* maxParseRetries: 2
|
|
759
|
+
* };
|
|
760
|
+
* ```
|
|
761
|
+
*/
|
|
236
762
|
type StructuredCompletionRequest<A> = CompletionRequest & {
|
|
763
|
+
/** Schema that the LLM response must conform to */
|
|
237
764
|
readonly outputSchema: Schema.Schema<A>;
|
|
765
|
+
/** If true, retry with corrected prompt if parse fails (default: false) */
|
|
238
766
|
readonly retryOnParseFail?: boolean;
|
|
767
|
+
/** Maximum parse retry attempts before giving up (default: 1) */
|
|
239
768
|
readonly maxParseRetries?: number;
|
|
240
769
|
};
|
|
241
|
-
|
|
770
|
+
/**
|
|
771
|
+
* Strategy for truncating context when it exceeds token budget.
|
|
772
|
+
* Used by ContextWindowManager when compacting message history for token limits.
|
|
773
|
+
*
|
|
774
|
+
* @example
|
|
775
|
+
* ```typescript
|
|
776
|
+
* const strategy: TruncationStrategy = "summarize-middle";
|
|
777
|
+
* ```
|
|
778
|
+
*/
|
|
779
|
+
type TruncationStrategy =
|
|
780
|
+
/** Remove oldest messages first (FIFO). Fastest; may lose early context. */
|
|
781
|
+
"drop-oldest"
|
|
782
|
+
/** Summarize middle messages, preserving system prompt and most recent turns. */
|
|
783
|
+
| "summarize-middle"
|
|
784
|
+
/** Keep only the most recent N messages; drops all prior history. */
|
|
785
|
+
| "sliding-window"
|
|
786
|
+
/** Use heuristics to score and drop least-important messages first. */
|
|
787
|
+
| "importance-based";
|
|
788
|
+
/**
|
|
789
|
+
* Observability verbosity level for LLM request events.
|
|
790
|
+
* Controls what is captured in each `LLMRequestEvent` published to the EventBus.
|
|
791
|
+
*
|
|
792
|
+
* @default "full"
|
|
793
|
+
*
|
|
794
|
+
* @example
|
|
795
|
+
* ```typescript
|
|
796
|
+
* const config = LLMConfig.of({
|
|
797
|
+
* // ... other fields
|
|
798
|
+
* observabilityVerbosity: process.env.NODE_ENV === "production" ? "metadata" : "full"
|
|
799
|
+
* });
|
|
800
|
+
* ```
|
|
801
|
+
*/
|
|
802
|
+
type ObservabilityVerbosity =
|
|
803
|
+
/** Capture timing, token counts, and cost only — lightweight, production-safe. */
|
|
804
|
+
"metadata"
|
|
805
|
+
/** Capture complete request/response payloads — higher overhead, useful for debugging. */
|
|
806
|
+
| "full";
|
|
242
807
|
|
|
243
808
|
declare const LLMError_base: new <A extends Record<string, any> = {}>(args: effect_Types.Equals<A, {}> extends true ? void : { readonly [P in keyof A as P extends "_tag" ? never : P]: A[P]; }) => effect_Cause.YieldableError & {
|
|
244
809
|
readonly _tag: "LLMError";
|
|
@@ -348,64 +913,297 @@ declare class LLMService extends LLMService_base {
|
|
|
348
913
|
}
|
|
349
914
|
|
|
350
915
|
declare const LLMConfig_base: Context.TagClass<LLMConfig, "LLMConfig", {
|
|
916
|
+
/**
|
|
917
|
+
* Default LLM provider.
|
|
918
|
+
* Used as fallback when a request does not specify a provider.
|
|
919
|
+
*
|
|
920
|
+
* @default "anthropic"
|
|
921
|
+
*/
|
|
351
922
|
readonly defaultProvider: LLMProvider;
|
|
923
|
+
/**
|
|
924
|
+
* Default LLM model identifier.
|
|
925
|
+
* Used as fallback when a request does not specify a model.
|
|
926
|
+
*
|
|
927
|
+
* @default From LLM_DEFAULT_MODEL env var, falls back to "claude-sonnet-4-20250514"
|
|
928
|
+
*/
|
|
352
929
|
readonly defaultModel: string;
|
|
930
|
+
/**
|
|
931
|
+
* Anthropic API key.
|
|
932
|
+
* Retrieved from ANTHROPIC_API_KEY environment variable.
|
|
933
|
+
* Required if provider is "anthropic".
|
|
934
|
+
*
|
|
935
|
+
* @default From ANTHROPIC_API_KEY env var (undefined if not set)
|
|
936
|
+
*/
|
|
353
937
|
readonly anthropicApiKey?: string;
|
|
938
|
+
/**
|
|
939
|
+
* OpenAI API key.
|
|
940
|
+
* Retrieved from OPENAI_API_KEY environment variable.
|
|
941
|
+
* Required if provider is "openai".
|
|
942
|
+
*
|
|
943
|
+
* @default From OPENAI_API_KEY env var (undefined if not set)
|
|
944
|
+
*/
|
|
354
945
|
readonly openaiApiKey?: string;
|
|
946
|
+
/**
|
|
947
|
+
* Google API key.
|
|
948
|
+
* Retrieved from GOOGLE_API_KEY environment variable.
|
|
949
|
+
* Required if provider is "gemini".
|
|
950
|
+
*
|
|
951
|
+
* @default From GOOGLE_API_KEY env var (undefined if not set)
|
|
952
|
+
*/
|
|
355
953
|
readonly googleApiKey?: string;
|
|
954
|
+
/**
|
|
955
|
+
* Ollama server endpoint.
|
|
956
|
+
* Retrieved from OLLAMA_ENDPOINT environment variable.
|
|
957
|
+
* Used for local model serving.
|
|
958
|
+
*
|
|
959
|
+
* @default "http://localhost:11434"
|
|
960
|
+
*/
|
|
356
961
|
readonly ollamaEndpoint?: string;
|
|
357
962
|
/**
|
|
358
|
-
* Embedding configuration
|
|
359
|
-
* embeddings route to OpenAI
|
|
360
|
-
* This is the
|
|
963
|
+
* Embedding configuration — model, provider, dimensions.
|
|
964
|
+
* Anthropic has no embeddings API; embeddings always route to OpenAI or Ollama.
|
|
965
|
+
* This is the sole embedding config for the entire framework.
|
|
966
|
+
* Used by semantic cache, memory similarity search, and verification layers.
|
|
967
|
+
*
|
|
968
|
+
* @default { model: "text-embedding-3-small", dimensions: 1536, provider: "openai", batchSize: 100 }
|
|
361
969
|
*/
|
|
362
970
|
readonly embeddingConfig: EmbeddingConfig;
|
|
363
971
|
/**
|
|
364
972
|
* Enable Anthropic prompt caching.
|
|
365
|
-
* When true, memory context injections are wrapped in
|
|
366
|
-
* `cache_control: { type: "ephemeral" }` blocks.
|
|
973
|
+
* When true, memory context injections and system prompts are wrapped in
|
|
974
|
+
* `cache_control: { type: "ephemeral" }` blocks to reduce costs.
|
|
975
|
+
* Non-Anthropic providers silently ignore cache control directives.
|
|
976
|
+
* Automatically set to true if defaultModel starts with "claude".
|
|
977
|
+
*
|
|
978
|
+
* @default true if defaultModel starts with "claude", false otherwise
|
|
367
979
|
*/
|
|
368
980
|
readonly supportsPromptCaching: boolean;
|
|
981
|
+
/**
|
|
982
|
+
* Maximum number of retries for transient LLM request failures.
|
|
983
|
+
* Applied with exponential backoff (2^n seconds between attempts).
|
|
984
|
+
*
|
|
985
|
+
* @default 3
|
|
986
|
+
*/
|
|
369
987
|
readonly maxRetries: number;
|
|
988
|
+
/**
|
|
989
|
+
* Request timeout in milliseconds.
|
|
990
|
+
* LLM requests exceeding this duration are aborted.
|
|
991
|
+
*
|
|
992
|
+
* @default 30000 (30 seconds)
|
|
993
|
+
*/
|
|
370
994
|
readonly timeoutMs: number;
|
|
995
|
+
/**
|
|
996
|
+
* Default maximum output tokens for LLM responses.
|
|
997
|
+
* Used if a CompletionRequest does not specify maxTokens.
|
|
998
|
+
* Set lower for faster responses; higher for longer outputs.
|
|
999
|
+
*
|
|
1000
|
+
* @default 4096
|
|
1001
|
+
*/
|
|
371
1002
|
readonly defaultMaxTokens: number;
|
|
1003
|
+
/**
|
|
1004
|
+
* Default sampling temperature (0.0-1.0).
|
|
1005
|
+
* Used if a CompletionRequest does not specify temperature.
|
|
1006
|
+
* 0.0 = deterministic; 1.0 = maximum randomness.
|
|
1007
|
+
*
|
|
1008
|
+
* @default 0.7 (good balance of creativity and coherence)
|
|
1009
|
+
*/
|
|
372
1010
|
readonly defaultTemperature: number;
|
|
1011
|
+
/**
|
|
1012
|
+
* LLM request/response observability verbosity.
|
|
1013
|
+
* Determines what data is captured in LLMRequestEvent for observability.
|
|
1014
|
+
*
|
|
1015
|
+
* - **"full"**: Capture complete request/response payloads (useful for debugging, higher overhead)
|
|
1016
|
+
* - **"metadata"**: Capture only timing, token counts, and cost (lightweight, production-safe)
|
|
1017
|
+
*
|
|
1018
|
+
* @default "full" (capture everything)
|
|
1019
|
+
*
|
|
1020
|
+
* @example
|
|
1021
|
+
* ```typescript
|
|
1022
|
+
* // Development: full details
|
|
1023
|
+
* observabilityVerbosity: process.env.NODE_ENV === "production" ? "metadata" : "full"
|
|
1024
|
+
* ```
|
|
1025
|
+
*/
|
|
1026
|
+
readonly observabilityVerbosity: ObservabilityVerbosity;
|
|
373
1027
|
}>;
|
|
374
1028
|
/**
|
|
375
|
-
* LLM configuration
|
|
1029
|
+
* LLM service configuration.
|
|
1030
|
+
* Provides API keys, default model settings, timeouts, and observability verbosity.
|
|
1031
|
+
* Typically constructed from environment variables via llmConfigFromEnv.
|
|
1032
|
+
*
|
|
1033
|
+
* @example
|
|
1034
|
+
* ```typescript
|
|
1035
|
+
* const config = LLMConfig.of({
|
|
1036
|
+
* defaultProvider: "anthropic",
|
|
1037
|
+
* defaultModel: "claude-opus-4-20250514",
|
|
1038
|
+
* anthropicApiKey: process.env.ANTHROPIC_API_KEY,
|
|
1039
|
+
* maxRetries: 3,
|
|
1040
|
+
* timeoutMs: 30000
|
|
1041
|
+
* });
|
|
1042
|
+
* ```
|
|
376
1043
|
*/
|
|
377
1044
|
declare class LLMConfig extends LLMConfig_base {
|
|
378
1045
|
}
|
|
379
1046
|
/**
|
|
380
|
-
* Raw LLMConfig
|
|
381
|
-
*
|
|
1047
|
+
* Raw LLMConfig object constructed from environment variables.
|
|
1048
|
+
* Reads all config from process.env with sensible defaults.
|
|
1049
|
+
* Exported so callers can spread overrides (e.g. change model) on top.
|
|
1050
|
+
*
|
|
1051
|
+
* Environment variables:
|
|
1052
|
+
* - LLM_DEFAULT_MODEL: Model identifier (default: claude-sonnet-4-20250514)
|
|
1053
|
+
* - ANTHROPIC_API_KEY: Anthropic API key
|
|
1054
|
+
* - OPENAI_API_KEY: OpenAI API key
|
|
1055
|
+
* - GOOGLE_API_KEY: Google API key
|
|
1056
|
+
* - OLLAMA_ENDPOINT: Ollama server URL (default: http://localhost:11434)
|
|
1057
|
+
* - EMBEDDING_MODEL: Embedding model name (default: text-embedding-3-small)
|
|
1058
|
+
* - EMBEDDING_DIMENSIONS: Embedding vector dimensions (default: 1536)
|
|
1059
|
+
* - EMBEDDING_PROVIDER: Embedding provider (default: openai)
|
|
1060
|
+
* - LLM_MAX_RETRIES: Retry attempts (default: 3)
|
|
1061
|
+
* - LLM_TIMEOUT_MS: Request timeout in ms (default: 30000)
|
|
1062
|
+
* - LLM_DEFAULT_TEMPERATURE: Sampling temperature (default: 0.7)
|
|
1063
|
+
* - LLM_OBSERVABILITY_VERBOSITY: "full" or "metadata" (default: full)
|
|
1064
|
+
*
|
|
1065
|
+
* @example
|
|
1066
|
+
* ```typescript
|
|
1067
|
+
* // Use defaults from environment
|
|
1068
|
+
* const config = llmConfigFromEnv;
|
|
1069
|
+
*
|
|
1070
|
+
* // Override specific fields
|
|
1071
|
+
* const customConfig = LLMConfig.of({
|
|
1072
|
+
* ...llmConfigFromEnv,
|
|
1073
|
+
* defaultModel: "gpt-4o",
|
|
1074
|
+
* defaultProvider: "openai"
|
|
1075
|
+
* });
|
|
1076
|
+
* ```
|
|
382
1077
|
*/
|
|
383
1078
|
declare const llmConfigFromEnv: {
|
|
1079
|
+
/**
|
|
1080
|
+
* Default LLM provider.
|
|
1081
|
+
* Used as fallback when a request does not specify a provider.
|
|
1082
|
+
*
|
|
1083
|
+
* @default "anthropic"
|
|
1084
|
+
*/
|
|
384
1085
|
readonly defaultProvider: LLMProvider;
|
|
1086
|
+
/**
|
|
1087
|
+
* Default LLM model identifier.
|
|
1088
|
+
* Used as fallback when a request does not specify a model.
|
|
1089
|
+
*
|
|
1090
|
+
* @default From LLM_DEFAULT_MODEL env var, falls back to "claude-sonnet-4-20250514"
|
|
1091
|
+
*/
|
|
385
1092
|
readonly defaultModel: string;
|
|
1093
|
+
/**
|
|
1094
|
+
* Anthropic API key.
|
|
1095
|
+
* Retrieved from ANTHROPIC_API_KEY environment variable.
|
|
1096
|
+
* Required if provider is "anthropic".
|
|
1097
|
+
*
|
|
1098
|
+
* @default From ANTHROPIC_API_KEY env var (undefined if not set)
|
|
1099
|
+
*/
|
|
386
1100
|
readonly anthropicApiKey?: string;
|
|
1101
|
+
/**
|
|
1102
|
+
* OpenAI API key.
|
|
1103
|
+
* Retrieved from OPENAI_API_KEY environment variable.
|
|
1104
|
+
* Required if provider is "openai".
|
|
1105
|
+
*
|
|
1106
|
+
* @default From OPENAI_API_KEY env var (undefined if not set)
|
|
1107
|
+
*/
|
|
387
1108
|
readonly openaiApiKey?: string;
|
|
1109
|
+
/**
|
|
1110
|
+
* Google API key.
|
|
1111
|
+
* Retrieved from GOOGLE_API_KEY environment variable.
|
|
1112
|
+
* Required if provider is "gemini".
|
|
1113
|
+
*
|
|
1114
|
+
* @default From GOOGLE_API_KEY env var (undefined if not set)
|
|
1115
|
+
*/
|
|
388
1116
|
readonly googleApiKey?: string;
|
|
1117
|
+
/**
|
|
1118
|
+
* Ollama server endpoint.
|
|
1119
|
+
* Retrieved from OLLAMA_ENDPOINT environment variable.
|
|
1120
|
+
* Used for local model serving.
|
|
1121
|
+
*
|
|
1122
|
+
* @default "http://localhost:11434"
|
|
1123
|
+
*/
|
|
389
1124
|
readonly ollamaEndpoint?: string;
|
|
390
1125
|
/**
|
|
391
|
-
* Embedding configuration
|
|
392
|
-
* embeddings route to OpenAI
|
|
393
|
-
* This is the
|
|
1126
|
+
* Embedding configuration — model, provider, dimensions.
|
|
1127
|
+
* Anthropic has no embeddings API; embeddings always route to OpenAI or Ollama.
|
|
1128
|
+
* This is the sole embedding config for the entire framework.
|
|
1129
|
+
* Used by semantic cache, memory similarity search, and verification layers.
|
|
1130
|
+
*
|
|
1131
|
+
* @default { model: "text-embedding-3-small", dimensions: 1536, provider: "openai", batchSize: 100 }
|
|
394
1132
|
*/
|
|
395
1133
|
readonly embeddingConfig: EmbeddingConfig;
|
|
396
1134
|
/**
|
|
397
1135
|
* Enable Anthropic prompt caching.
|
|
398
|
-
* When true, memory context injections are wrapped in
|
|
399
|
-
* `cache_control: { type: "ephemeral" }` blocks.
|
|
1136
|
+
* When true, memory context injections and system prompts are wrapped in
|
|
1137
|
+
* `cache_control: { type: "ephemeral" }` blocks to reduce costs.
|
|
1138
|
+
* Non-Anthropic providers silently ignore cache control directives.
|
|
1139
|
+
* Automatically set to true if defaultModel starts with "claude".
|
|
1140
|
+
*
|
|
1141
|
+
* @default true if defaultModel starts with "claude", false otherwise
|
|
400
1142
|
*/
|
|
401
1143
|
readonly supportsPromptCaching: boolean;
|
|
1144
|
+
/**
|
|
1145
|
+
* Maximum number of retries for transient LLM request failures.
|
|
1146
|
+
* Applied with exponential backoff (2^n seconds between attempts).
|
|
1147
|
+
*
|
|
1148
|
+
* @default 3
|
|
1149
|
+
*/
|
|
402
1150
|
readonly maxRetries: number;
|
|
1151
|
+
/**
|
|
1152
|
+
* Request timeout in milliseconds.
|
|
1153
|
+
* LLM requests exceeding this duration are aborted.
|
|
1154
|
+
*
|
|
1155
|
+
* @default 30000 (30 seconds)
|
|
1156
|
+
*/
|
|
403
1157
|
readonly timeoutMs: number;
|
|
1158
|
+
/**
|
|
1159
|
+
* Default maximum output tokens for LLM responses.
|
|
1160
|
+
* Used if a CompletionRequest does not specify maxTokens.
|
|
1161
|
+
* Set lower for faster responses; higher for longer outputs.
|
|
1162
|
+
*
|
|
1163
|
+
* @default 4096
|
|
1164
|
+
*/
|
|
404
1165
|
readonly defaultMaxTokens: number;
|
|
1166
|
+
/**
|
|
1167
|
+
* Default sampling temperature (0.0-1.0).
|
|
1168
|
+
* Used if a CompletionRequest does not specify temperature.
|
|
1169
|
+
* 0.0 = deterministic; 1.0 = maximum randomness.
|
|
1170
|
+
*
|
|
1171
|
+
* @default 0.7 (good balance of creativity and coherence)
|
|
1172
|
+
*/
|
|
405
1173
|
readonly defaultTemperature: number;
|
|
1174
|
+
/**
|
|
1175
|
+
* LLM request/response observability verbosity.
|
|
1176
|
+
* Determines what data is captured in LLMRequestEvent for observability.
|
|
1177
|
+
*
|
|
1178
|
+
* - **"full"**: Capture complete request/response payloads (useful for debugging, higher overhead)
|
|
1179
|
+
* - **"metadata"**: Capture only timing, token counts, and cost (lightweight, production-safe)
|
|
1180
|
+
*
|
|
1181
|
+
* @default "full" (capture everything)
|
|
1182
|
+
*
|
|
1183
|
+
* @example
|
|
1184
|
+
* ```typescript
|
|
1185
|
+
* // Development: full details
|
|
1186
|
+
* observabilityVerbosity: process.env.NODE_ENV === "production" ? "metadata" : "full"
|
|
1187
|
+
* ```
|
|
1188
|
+
*/
|
|
1189
|
+
readonly observabilityVerbosity: ObservabilityVerbosity;
|
|
406
1190
|
};
|
|
407
1191
|
/**
|
|
408
|
-
*
|
|
1192
|
+
* Effect-TS Layer that provides LLMConfig from environment variables.
|
|
1193
|
+
* Use this layer to automatically populate LLMConfig from process.env.
|
|
1194
|
+
* Can be overridden with a custom layer for testing or custom configuration.
|
|
1195
|
+
*
|
|
1196
|
+
* @example
|
|
1197
|
+
* ```typescript
|
|
1198
|
+
* const effect = Effect.gen(function* () {
|
|
1199
|
+
* const config = yield* LLMConfig;
|
|
1200
|
+
* console.log(config.defaultModel);
|
|
1201
|
+
* }).pipe(Effect.provide(LLMConfigFromEnv));
|
|
1202
|
+
*
|
|
1203
|
+
* Effect.runPromise(effect);
|
|
1204
|
+
* ```
|
|
1205
|
+
*
|
|
1206
|
+
* @see llmConfigFromEnv
|
|
409
1207
|
*/
|
|
410
1208
|
declare const LLMConfigFromEnv: Layer.Layer<LLMConfig, never, never>;
|
|
411
1209
|
|
|
@@ -447,6 +1245,8 @@ declare const LocalProviderLive: Layer.Layer<LLMService, never, LLMConfig>;
|
|
|
447
1245
|
|
|
448
1246
|
declare const GeminiProviderLive: Layer.Layer<LLMService, never, LLMConfig>;
|
|
449
1247
|
|
|
1248
|
+
declare const LiteLLMProviderLive: Layer.Layer<LLMService, never, LLMConfig>;
|
|
1249
|
+
|
|
450
1250
|
/**
|
|
451
1251
|
* Create a deterministic test LLM service.
|
|
452
1252
|
* Returns responses based on pattern matching against prompt content.
|
|
@@ -565,10 +1365,10 @@ type ComplexityAnalysis = Schema.Schema.Type<typeof ComplexityAnalysisSchema>;
|
|
|
565
1365
|
* Create the LLM provider layer for a specific provider.
|
|
566
1366
|
* Uses env vars for configuration by default.
|
|
567
1367
|
*/
|
|
568
|
-
declare const createLLMProviderLayer: (provider?: "anthropic" | "openai" | "ollama" | "gemini" | "test", testResponses?: Record<string, string>, model?: string) => Layer.Layer<LLMService | PromptManager, never, never>;
|
|
1368
|
+
declare const createLLMProviderLayer: (provider?: "anthropic" | "openai" | "ollama" | "gemini" | "litellm" | "test", testResponses?: Record<string, string>, model?: string) => Layer.Layer<LLMService | PromptManager, never, never>;
|
|
569
1369
|
/**
|
|
570
1370
|
* LLM layer with custom config (for programmatic use).
|
|
571
1371
|
*/
|
|
572
|
-
declare const createLLMProviderLayerWithConfig: (config: typeof LLMConfig.Service, provider?: "anthropic" | "openai" | "ollama" | "gemini") => Layer.Layer<LLMService | PromptManager, never, never>;
|
|
1372
|
+
declare const createLLMProviderLayerWithConfig: (config: typeof LLMConfig.Service, provider?: "anthropic" | "openai" | "ollama" | "gemini" | "litellm") => Layer.Layer<LLMService | PromptManager, never, never>;
|
|
573
1373
|
|
|
574
|
-
export { AnthropicProviderLive, type CacheControl, CacheControlSchema, type CacheableContentBlock, type CompletionRequest, type CompletionResponse, CompletionResponseSchema, type ComplexityAnalysis, ComplexityAnalysisSchema, type ContentBlock, DefaultEmbeddingConfig, type EmbeddingConfig, EmbeddingConfigSchema, GeminiProviderLive, ImageContentBlockSchema, type ImageSource, ImageSourceSchema, LLMConfig, LLMConfigFromEnv, LLMContextOverflowError, LLMError, type LLMErrors, type LLMMessage, LLMParseError, type LLMProvider, LLMProviderType, LLMRateLimitError, LLMService, LLMTimeoutError, LocalProviderLive, type ModelConfig, ModelConfigSchema, type ModelPresetName, ModelPresets, OpenAIProviderLive, type Plan, PlanSchema, PromptManager, PromptManagerLive, type ReActAction, ReActActionSchema, type Reflection, ReflectionSchema, type StopReason, StopReasonSchema, type StrategySelection, StrategySelectionSchema, type StreamEvent, type StructuredCompletionRequest, TestLLMService, TestLLMServiceLayer, TextContentBlockSchema, type ThoughtEvaluation, ThoughtEvaluationSchema, type TokenUsage, TokenUsageSchema, type ToolCall, ToolCallSchema, type ToolDefinition, ToolDefinitionSchema, ToolResultContentBlockSchema, ToolUseContentBlockSchema, type TruncationStrategy, calculateCost, createLLMProviderLayer, createLLMProviderLayerWithConfig, estimateTokenCount, llmConfigFromEnv, makeCacheable, retryPolicy };
|
|
1374
|
+
export { AnthropicProviderLive, type CacheControl, CacheControlSchema, type CacheableContentBlock, type CompletionRequest, type CompletionResponse, CompletionResponseSchema, type ComplexityAnalysis, ComplexityAnalysisSchema, type ContentBlock, DefaultEmbeddingConfig, type EmbeddingConfig, EmbeddingConfigSchema, GeminiProviderLive, ImageContentBlockSchema, type ImageSource, ImageSourceSchema, LLMConfig, LLMConfigFromEnv, LLMContextOverflowError, LLMError, type LLMErrors, type LLMMessage, LLMParseError, type LLMProvider, LLMProviderType, LLMRateLimitError, LLMService, LLMTimeoutError, LiteLLMProviderLive, LocalProviderLive, type ModelConfig, ModelConfigSchema, type ModelPresetName, ModelPresets, OpenAIProviderLive, type Plan, PlanSchema, PromptManager, PromptManagerLive, type ReActAction, ReActActionSchema, type Reflection, ReflectionSchema, type StopReason, StopReasonSchema, type StrategySelection, StrategySelectionSchema, type StreamEvent, type StructuredCompletionRequest, TestLLMService, TestLLMServiceLayer, TextContentBlockSchema, type ThoughtEvaluation, ThoughtEvaluationSchema, type TokenUsage, TokenUsageSchema, type ToolCall, ToolCallSchema, type ToolDefinition, ToolDefinitionSchema, ToolResultContentBlockSchema, ToolUseContentBlockSchema, type TruncationStrategy, calculateCost, createLLMProviderLayer, createLLMProviderLayerWithConfig, estimateTokenCount, llmConfigFromEnv, makeCacheable, retryPolicy };
|