@reactive-agents/llm-provider 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js ADDED
@@ -0,0 +1,1683 @@
1
+ var __require = /* @__PURE__ */ ((x) => typeof require !== "undefined" ? require : typeof Proxy !== "undefined" ? new Proxy(x, {
2
+ get: (a, b) => (typeof require !== "undefined" ? require : a)[b]
3
+ }) : x)(function(x) {
4
+ if (typeof require !== "undefined") return require.apply(this, arguments);
5
+ throw Error('Dynamic require of "' + x + '" is not supported');
6
+ });
7
+
8
+ // src/types.ts
9
+ import { Schema } from "effect";
10
+ var LLMProviderType = Schema.Literal(
11
+ "anthropic",
12
+ "openai",
13
+ "ollama",
14
+ "gemini",
15
+ "custom"
16
+ );
17
+ var EmbeddingConfigSchema = Schema.Struct({
18
+ model: Schema.String,
19
+ dimensions: Schema.Number,
20
+ provider: Schema.Literal("openai", "ollama"),
21
+ batchSize: Schema.optional(Schema.Number)
22
+ });
23
+ var DefaultEmbeddingConfig = {
24
+ model: "text-embedding-3-small",
25
+ dimensions: 1536,
26
+ provider: "openai",
27
+ batchSize: 100
28
+ };
29
+ var ModelConfigSchema = Schema.Struct({
30
+ provider: LLMProviderType,
31
+ model: Schema.String,
32
+ maxTokens: Schema.optional(Schema.Number),
33
+ temperature: Schema.optional(Schema.Number),
34
+ topP: Schema.optional(Schema.Number),
35
+ stopSequences: Schema.optional(Schema.Array(Schema.String))
36
+ });
37
+ var ModelPresets = {
38
+ "claude-haiku": {
39
+ provider: "anthropic",
40
+ model: "claude-3-5-haiku-20241022",
41
+ costPer1MInput: 1,
42
+ costPer1MOutput: 5,
43
+ maxContext: 2e5,
44
+ quality: 0.6
45
+ },
46
+ "claude-sonnet": {
47
+ provider: "anthropic",
48
+ model: "claude-sonnet-4-20250514",
49
+ costPer1MInput: 3,
50
+ costPer1MOutput: 15,
51
+ maxContext: 2e5,
52
+ quality: 0.85
53
+ },
54
+ "claude-sonnet-4-5": {
55
+ provider: "anthropic",
56
+ model: "claude-sonnet-4-5-20250929",
57
+ costPer1MInput: 3,
58
+ costPer1MOutput: 15,
59
+ maxContext: 2e5,
60
+ quality: 0.9
61
+ },
62
+ "claude-opus": {
63
+ provider: "anthropic",
64
+ model: "claude-opus-4-20250514",
65
+ costPer1MInput: 15,
66
+ costPer1MOutput: 75,
67
+ maxContext: 1e6,
68
+ quality: 1
69
+ },
70
+ "gpt-4o-mini": {
71
+ provider: "openai",
72
+ model: "gpt-4o-mini",
73
+ costPer1MInput: 0.15,
74
+ costPer1MOutput: 0.6,
75
+ maxContext: 128e3,
76
+ quality: 0.55
77
+ },
78
+ "gpt-4o": {
79
+ provider: "openai",
80
+ model: "gpt-4o",
81
+ costPer1MInput: 2.5,
82
+ costPer1MOutput: 10,
83
+ maxContext: 128e3,
84
+ quality: 0.8
85
+ },
86
+ "gemini-2.0-flash": {
87
+ provider: "gemini",
88
+ model: "gemini-2.0-flash",
89
+ costPer1MInput: 0.1,
90
+ costPer1MOutput: 0.4,
91
+ maxContext: 1e6,
92
+ quality: 0.75
93
+ },
94
+ "gemini-2.5-pro": {
95
+ provider: "gemini",
96
+ model: "gemini-2.5-pro-preview-03-25",
97
+ costPer1MInput: 1.25,
98
+ costPer1MOutput: 10,
99
+ maxContext: 1e6,
100
+ quality: 0.95
101
+ }
102
+ };
103
+ var CacheControlSchema = Schema.Struct({
104
+ type: Schema.Literal("ephemeral")
105
+ });
106
+ var ImageSourceSchema = Schema.Struct({
107
+ type: Schema.Literal("base64", "url"),
108
+ media_type: Schema.Literal(
109
+ "image/png",
110
+ "image/jpeg",
111
+ "image/gif",
112
+ "image/webp"
113
+ ),
114
+ data: Schema.String
115
+ });
116
+ var TextContentBlockSchema = Schema.Struct({
117
+ type: Schema.Literal("text"),
118
+ text: Schema.String,
119
+ cache_control: Schema.optional(CacheControlSchema)
120
+ });
121
+ var ImageContentBlockSchema = Schema.Struct({
122
+ type: Schema.Literal("image"),
123
+ source: ImageSourceSchema
124
+ });
125
+ var ToolUseContentBlockSchema = Schema.Struct({
126
+ type: Schema.Literal("tool_use"),
127
+ id: Schema.String,
128
+ name: Schema.String,
129
+ input: Schema.Unknown
130
+ });
131
+ var ToolResultContentBlockSchema = Schema.Struct({
132
+ type: Schema.Literal("tool_result"),
133
+ tool_use_id: Schema.String,
134
+ content: Schema.String
135
+ });
136
+ var makeCacheable = (text) => ({
137
+ type: "text",
138
+ text,
139
+ cache_control: { type: "ephemeral" }
140
+ });
141
+ var TokenUsageSchema = Schema.Struct({
142
+ inputTokens: Schema.Number,
143
+ outputTokens: Schema.Number,
144
+ totalTokens: Schema.Number,
145
+ estimatedCost: Schema.Number
146
+ });
147
+ var StopReasonSchema = Schema.Literal(
148
+ "end_turn",
149
+ "max_tokens",
150
+ "stop_sequence",
151
+ "tool_use"
152
+ );
153
+ var ToolDefinitionSchema = Schema.Struct({
154
+ name: Schema.String,
155
+ description: Schema.String,
156
+ inputSchema: Schema.Record({ key: Schema.String, value: Schema.Unknown })
157
+ });
158
+ var ToolCallSchema = Schema.Struct({
159
+ id: Schema.String,
160
+ name: Schema.String,
161
+ input: Schema.Unknown
162
+ });
163
+ var CompletionResponseSchema = Schema.Struct({
164
+ content: Schema.String,
165
+ stopReason: StopReasonSchema,
166
+ usage: TokenUsageSchema,
167
+ model: Schema.String,
168
+ toolCalls: Schema.optional(Schema.Array(ToolCallSchema))
169
+ });
170
+
171
+ // src/errors.ts
172
+ import { Data } from "effect";
173
+ var LLMError = class extends Data.TaggedError("LLMError") {
174
+ };
175
+ var LLMRateLimitError = class extends Data.TaggedError("LLMRateLimitError") {
176
+ };
177
+ var LLMTimeoutError = class extends Data.TaggedError("LLMTimeoutError") {
178
+ };
179
+ var LLMParseError = class extends Data.TaggedError("LLMParseError") {
180
+ };
181
+ var LLMContextOverflowError = class extends Data.TaggedError(
182
+ "LLMContextOverflowError"
183
+ ) {
184
+ };
185
+
186
+ // src/llm-service.ts
187
+ import { Context } from "effect";
188
+ var LLMService = class extends Context.Tag("LLMService")() {
189
+ };
190
+
191
+ // src/llm-config.ts
192
+ import { Context as Context2, Layer } from "effect";
193
+ var LLMConfig = class extends Context2.Tag("LLMConfig")() {
194
+ };
195
+ var LLMConfigFromEnv = Layer.succeed(
196
+ LLMConfig,
197
+ LLMConfig.of({
198
+ defaultProvider: "anthropic",
199
+ defaultModel: process.env.LLM_DEFAULT_MODEL ?? "claude-sonnet-4-20250514",
200
+ anthropicApiKey: process.env.ANTHROPIC_API_KEY,
201
+ openaiApiKey: process.env.OPENAI_API_KEY,
202
+ googleApiKey: process.env.GOOGLE_API_KEY,
203
+ ollamaEndpoint: process.env.OLLAMA_ENDPOINT ?? "http://localhost:11434",
204
+ embeddingConfig: {
205
+ model: process.env.EMBEDDING_MODEL ?? "text-embedding-3-small",
206
+ dimensions: Number(process.env.EMBEDDING_DIMENSIONS ?? 1536),
207
+ provider: process.env.EMBEDDING_PROVIDER ?? "openai",
208
+ batchSize: 100
209
+ },
210
+ supportsPromptCaching: (process.env.LLM_DEFAULT_MODEL ?? "claude-sonnet-4-20250514").startsWith("claude"),
211
+ maxRetries: Number(process.env.LLM_MAX_RETRIES ?? 3),
212
+ timeoutMs: Number(process.env.LLM_TIMEOUT_MS ?? 3e4),
213
+ defaultMaxTokens: 4096,
214
+ defaultTemperature: Number(process.env.LLM_DEFAULT_TEMPERATURE ?? 0.7)
215
+ })
216
+ );
217
+
218
+ // src/prompt-manager.ts
219
+ import { Effect as Effect3, Context as Context3, Layer as Layer2 } from "effect";
220
+
221
+ // src/token-counter.ts
222
+ import { Effect as Effect2 } from "effect";
223
+ var estimateTokenCount = (messages) => Effect2.sync(() => {
224
+ let totalChars = 0;
225
+ for (const msg of messages) {
226
+ if (typeof msg.content === "string") {
227
+ totalChars += msg.content.length;
228
+ } else {
229
+ for (const block of msg.content) {
230
+ if (block.type === "text") {
231
+ totalChars += block.text.length;
232
+ } else if (block.type === "tool_result") {
233
+ totalChars += block.content.length;
234
+ } else if (block.type === "tool_use") {
235
+ totalChars += JSON.stringify(block.input).length;
236
+ }
237
+ }
238
+ }
239
+ totalChars += 16;
240
+ }
241
+ return Math.ceil(totalChars / 4);
242
+ });
243
+ var calculateCost = (inputTokens, outputTokens, model) => {
244
+ const costMap = {
245
+ "claude-3-5-haiku-20241022": { input: 1, output: 5 },
246
+ "claude-sonnet-4-20250514": { input: 3, output: 15 },
247
+ "claude-sonnet-4-5-20250929": { input: 3, output: 15 },
248
+ "claude-opus-4-20250514": { input: 15, output: 75 },
249
+ "gpt-4o-mini": { input: 0.15, output: 0.6 },
250
+ "gpt-4o": { input: 2.5, output: 10 },
251
+ "gemini-2.0-flash": { input: 0.1, output: 0.4 },
252
+ "gemini-2.5-pro-preview-03-25": { input: 1.25, output: 10 },
253
+ "gemini-embedding-001": { input: 0, output: 0 }
254
+ };
255
+ const costs = costMap[model] ?? { input: 3, output: 15 };
256
+ return inputTokens / 1e6 * costs.input + outputTokens / 1e6 * costs.output;
257
+ };
258
+
259
+ // src/prompt-manager.ts
260
+ var PromptManager = class extends Context3.Tag("PromptManager")() {
261
+ };
262
+ var PromptManagerLive = Layer2.succeed(
263
+ PromptManager,
264
+ PromptManager.of({
265
+ buildPrompt: (options) => Effect3.gen(function* () {
266
+ const {
267
+ systemPrompt,
268
+ messages,
269
+ reserveOutputTokens,
270
+ maxContextTokens,
271
+ truncationStrategy
272
+ } = options;
273
+ const budget = maxContextTokens - reserveOutputTokens;
274
+ const systemMessage = {
275
+ role: "system",
276
+ content: systemPrompt
277
+ };
278
+ const systemTokens = yield* estimateTokenCount([systemMessage]);
279
+ if (systemTokens >= budget) {
280
+ return [systemMessage];
281
+ }
282
+ const remainingBudget = budget - systemTokens;
283
+ const truncated = yield* applyTruncation(
284
+ messages,
285
+ remainingBudget,
286
+ truncationStrategy
287
+ );
288
+ return [systemMessage, ...truncated];
289
+ }),
290
+ fitsInContext: (messages, maxTokens) => Effect3.gen(function* () {
291
+ const count = yield* estimateTokenCount(messages);
292
+ return count <= maxTokens;
293
+ })
294
+ })
295
+ );
296
+ var applyTruncation = (messages, budget, strategy) => Effect3.gen(function* () {
297
+ const totalTokens = yield* estimateTokenCount(messages);
298
+ if (totalTokens <= budget) {
299
+ return messages;
300
+ }
301
+ switch (strategy) {
302
+ case "drop-oldest": {
303
+ const result = [];
304
+ let usedTokens = 0;
305
+ for (let i = messages.length - 1; i >= 0; i--) {
306
+ const msgTokens = yield* estimateTokenCount([messages[i]]);
307
+ if (usedTokens + msgTokens <= budget) {
308
+ result.unshift(messages[i]);
309
+ usedTokens += msgTokens;
310
+ } else {
311
+ break;
312
+ }
313
+ }
314
+ return result;
315
+ }
316
+ case "sliding-window": {
317
+ const result = [];
318
+ let usedTokens = 0;
319
+ for (let i = messages.length - 1; i >= 0; i--) {
320
+ const msgTokens = yield* estimateTokenCount([messages[i]]);
321
+ if (usedTokens + msgTokens <= budget) {
322
+ result.unshift(messages[i]);
323
+ usedTokens += msgTokens;
324
+ } else {
325
+ break;
326
+ }
327
+ }
328
+ return result;
329
+ }
330
+ case "summarize-middle":
331
+ case "importance-based": {
332
+ const result = [];
333
+ let usedTokens = 0;
334
+ if (messages.length > 0) {
335
+ const firstTokens = yield* estimateTokenCount([messages[0]]);
336
+ if (firstTokens <= budget) {
337
+ result.push(messages[0]);
338
+ usedTokens += firstTokens;
339
+ }
340
+ }
341
+ const tail = [];
342
+ for (let i = messages.length - 1; i >= 1; i--) {
343
+ const msgTokens = yield* estimateTokenCount([messages[i]]);
344
+ if (usedTokens + msgTokens <= budget) {
345
+ tail.unshift(messages[i]);
346
+ usedTokens += msgTokens;
347
+ } else {
348
+ break;
349
+ }
350
+ }
351
+ return [...result, ...tail];
352
+ }
353
+ }
354
+ });
355
+
356
+ // src/providers/anthropic.ts
357
+ import { Effect as Effect4, Layer as Layer3, Stream, Schema as Schema2 } from "effect";
358
+
359
+ // src/retry.ts
360
+ import { Schedule } from "effect";
361
+ var retryPolicy = Schedule.intersect(
362
+ Schedule.recurs(3),
363
+ Schedule.exponential("1 second", 2)
364
+ ).pipe(
365
+ Schedule.whileInput(
366
+ (error) => error._tag === "LLMRateLimitError" || error._tag === "LLMTimeoutError"
367
+ )
368
+ );
369
+
370
+ // src/providers/anthropic.ts
371
+ var toAnthropicMessages = (messages) => messages.filter((m) => m.role !== "system").map((m) => ({
372
+ role: m.role,
373
+ content: typeof m.content === "string" ? m.content : m.content.map(
374
+ (b) => b
375
+ )
376
+ }));
377
+ var toAnthropicTool = (tool) => ({
378
+ name: tool.name,
379
+ description: tool.description,
380
+ input_schema: {
381
+ type: "object",
382
+ ...tool.inputSchema
383
+ }
384
+ });
385
+ var toEffectError = (error, provider) => {
386
+ const err = error;
387
+ if (err.status === 429) {
388
+ const retryAfter = err.headers?.["retry-after"];
389
+ return new LLMRateLimitError({
390
+ message: err.message ?? "Rate limit exceeded",
391
+ provider,
392
+ retryAfterMs: retryAfter ? Number(retryAfter) * 1e3 : 6e4
393
+ });
394
+ }
395
+ return new LLMError({
396
+ message: err.message ?? String(error),
397
+ provider,
398
+ cause: error
399
+ });
400
+ };
401
+ var AnthropicProviderLive = Layer3.effect(
402
+ LLMService,
403
+ Effect4.gen(function* () {
404
+ const config = yield* LLMConfig;
405
+ const createClient = () => {
406
+ const Anthropic = __require("@anthropic-ai/sdk").default;
407
+ return new Anthropic({ apiKey: config.anthropicApiKey });
408
+ };
409
+ let _client = null;
410
+ const getClient = () => {
411
+ if (!_client) _client = createClient();
412
+ return _client;
413
+ };
414
+ return LLMService.of({
415
+ complete: (request) => Effect4.gen(function* () {
416
+ const client = getClient();
417
+ const model = request.model?.model ?? config.defaultModel;
418
+ const response = yield* Effect4.tryPromise({
419
+ try: () => client.messages.create({
420
+ model,
421
+ max_tokens: request.maxTokens ?? config.defaultMaxTokens,
422
+ temperature: request.temperature ?? config.defaultTemperature,
423
+ system: request.systemPrompt,
424
+ messages: toAnthropicMessages(request.messages),
425
+ stop_sequences: request.stopSequences ? [...request.stopSequences] : void 0,
426
+ tools: request.tools?.map(toAnthropicTool)
427
+ }),
428
+ catch: (error) => toEffectError(error, "anthropic")
429
+ });
430
+ return mapAnthropicResponse(response, model);
431
+ }).pipe(
432
+ Effect4.retry(retryPolicy),
433
+ Effect4.timeout("30 seconds"),
434
+ Effect4.catchTag(
435
+ "TimeoutException",
436
+ () => Effect4.fail(
437
+ new LLMTimeoutError({
438
+ message: "LLM request timed out",
439
+ provider: "anthropic",
440
+ timeoutMs: 3e4
441
+ })
442
+ )
443
+ )
444
+ ),
445
+ stream: (request) => Effect4.gen(function* () {
446
+ const client = getClient();
447
+ const model = request.model?.model ?? config.defaultModel;
448
+ return Stream.async((emit) => {
449
+ const stream = client.messages.stream({
450
+ model,
451
+ max_tokens: request.maxTokens ?? config.defaultMaxTokens,
452
+ temperature: request.temperature ?? config.defaultTemperature,
453
+ system: request.systemPrompt,
454
+ messages: toAnthropicMessages(request.messages)
455
+ });
456
+ stream.on("text", (text) => {
457
+ emit.single({ type: "text_delta", text });
458
+ });
459
+ stream.on("finalMessage", (message) => {
460
+ const msg = message;
461
+ const content = msg.content.filter(
462
+ (b) => b.type === "text"
463
+ ).map((b) => b.text).join("");
464
+ emit.single({ type: "content_complete", content });
465
+ emit.single({
466
+ type: "usage",
467
+ usage: {
468
+ inputTokens: msg.usage.input_tokens,
469
+ outputTokens: msg.usage.output_tokens,
470
+ totalTokens: msg.usage.input_tokens + msg.usage.output_tokens,
471
+ estimatedCost: calculateCost(
472
+ msg.usage.input_tokens,
473
+ msg.usage.output_tokens,
474
+ model
475
+ )
476
+ }
477
+ });
478
+ emit.end();
479
+ });
480
+ stream.on("error", (error) => {
481
+ const err = error;
482
+ emit.fail(
483
+ new LLMError({
484
+ message: err.message ?? String(error),
485
+ provider: "anthropic",
486
+ cause: error
487
+ })
488
+ );
489
+ });
490
+ });
491
+ }),
492
+ completeStructured: (request) => Effect4.gen(function* () {
493
+ const schemaStr = JSON.stringify(
494
+ Schema2.encodedSchema(request.outputSchema),
495
+ null,
496
+ 2
497
+ );
498
+ const messagesWithFormat = [
499
+ ...request.messages,
500
+ {
501
+ role: "user",
502
+ content: `
503
+ Respond with ONLY valid JSON matching this schema:
504
+ ${schemaStr}
505
+
506
+ No markdown, no code fences, just raw JSON.`
507
+ }
508
+ ];
509
+ let lastError = null;
510
+ const maxRetries = request.maxParseRetries ?? 2;
511
+ for (let attempt = 0; attempt <= maxRetries; attempt++) {
512
+ const msgs = attempt === 0 ? messagesWithFormat : [
513
+ ...messagesWithFormat,
514
+ {
515
+ role: "assistant",
516
+ content: String(lastError)
517
+ },
518
+ {
519
+ role: "user",
520
+ content: `That response was not valid JSON. The parse error was: ${String(lastError)}. Please try again with valid JSON only.`
521
+ }
522
+ ];
523
+ const completeResult = yield* Effect4.tryPromise({
524
+ try: () => {
525
+ const client = getClient();
526
+ return client.messages.create({
527
+ model: request.model?.model ?? config.defaultModel,
528
+ max_tokens: request.maxTokens ?? config.defaultMaxTokens,
529
+ temperature: request.temperature ?? config.defaultTemperature,
530
+ system: request.systemPrompt,
531
+ messages: toAnthropicMessages(msgs)
532
+ });
533
+ },
534
+ catch: (error) => toEffectError(error, "anthropic")
535
+ });
536
+ const response = mapAnthropicResponse(
537
+ completeResult,
538
+ request.model?.model ?? config.defaultModel
539
+ );
540
+ try {
541
+ const parsed = JSON.parse(response.content);
542
+ const decoded = Schema2.decodeUnknownEither(
543
+ request.outputSchema
544
+ )(parsed);
545
+ if (decoded._tag === "Right") {
546
+ return decoded.right;
547
+ }
548
+ lastError = decoded.left;
549
+ } catch (e) {
550
+ lastError = e;
551
+ }
552
+ }
553
+ return yield* Effect4.fail(
554
+ new LLMParseError({
555
+ message: `Failed to parse structured output after ${maxRetries + 1} attempts`,
556
+ rawOutput: String(lastError),
557
+ expectedSchema: schemaStr
558
+ })
559
+ );
560
+ }),
561
+ embed: (texts, model) => Effect4.tryPromise({
562
+ try: async () => {
563
+ const embeddingModel = model ?? config.embeddingConfig.model;
564
+ const embProvider = config.embeddingConfig.provider;
565
+ if (embProvider === "openai") {
566
+ const { default: OpenAI } = await import("openai");
567
+ const openaiClient = new OpenAI({
568
+ apiKey: config.openaiApiKey
569
+ });
570
+ const batchSize = config.embeddingConfig.batchSize ?? 100;
571
+ const results = [];
572
+ for (let i = 0; i < texts.length; i += batchSize) {
573
+ const batch = texts.slice(i, i + batchSize);
574
+ const response = await openaiClient.embeddings.create({
575
+ model: embeddingModel,
576
+ input: [...batch],
577
+ dimensions: config.embeddingConfig.dimensions
578
+ });
579
+ results.push(
580
+ ...response.data.map(
581
+ (d) => d.embedding
582
+ )
583
+ );
584
+ }
585
+ return results;
586
+ }
587
+ const endpoint = config.ollamaEndpoint ?? "http://localhost:11434";
588
+ return Promise.all(
589
+ [...texts].map(async (text) => {
590
+ const res = await fetch(`${endpoint}/api/embed`, {
591
+ method: "POST",
592
+ headers: { "Content-Type": "application/json" },
593
+ body: JSON.stringify({
594
+ model: embeddingModel,
595
+ input: text
596
+ })
597
+ });
598
+ const data = await res.json();
599
+ return data.embeddings[0];
600
+ })
601
+ );
602
+ },
603
+ catch: (error) => new LLMError({
604
+ message: `Embedding failed: ${error}`,
605
+ provider: "anthropic",
606
+ cause: error
607
+ })
608
+ }),
609
+ countTokens: (messages) => Effect4.gen(function* () {
610
+ return yield* estimateTokenCount(messages);
611
+ }),
612
+ getModelConfig: () => Effect4.succeed({
613
+ provider: "anthropic",
614
+ model: config.defaultModel
615
+ })
616
+ });
617
+ })
618
+ );
619
+ var mapAnthropicResponse = (response, model) => {
620
+ const textContent = response.content.filter(
621
+ (b) => b.type === "text"
622
+ ).map((b) => b.text).join("");
623
+ const toolCalls = response.content.filter(
624
+ (b) => b.type === "tool_use"
625
+ ).map((b) => ({
626
+ id: b.id,
627
+ name: b.name,
628
+ input: b.input
629
+ }));
630
+ const stopReason = response.stop_reason === "end_turn" ? "end_turn" : response.stop_reason === "max_tokens" ? "max_tokens" : response.stop_reason === "stop_sequence" ? "stop_sequence" : response.stop_reason === "tool_use" ? "tool_use" : "end_turn";
631
+ return {
632
+ content: textContent,
633
+ stopReason,
634
+ usage: {
635
+ inputTokens: response.usage.input_tokens,
636
+ outputTokens: response.usage.output_tokens,
637
+ totalTokens: response.usage.input_tokens + response.usage.output_tokens,
638
+ estimatedCost: calculateCost(
639
+ response.usage.input_tokens,
640
+ response.usage.output_tokens,
641
+ model
642
+ )
643
+ },
644
+ model: response.model ?? model,
645
+ toolCalls: toolCalls.length > 0 ? toolCalls : void 0
646
+ };
647
+ };
648
+
649
+ // src/providers/openai.ts
650
+ import { Effect as Effect5, Layer as Layer4, Stream as Stream2, Schema as Schema3 } from "effect";
651
+ var toOpenAIMessages = (messages) => messages.map((m) => ({
652
+ role: m.role,
653
+ content: typeof m.content === "string" ? m.content : m.content.filter(
654
+ (b) => b.type === "text"
655
+ ).map((b) => b.text).join("")
656
+ }));
657
+ var toEffectError2 = (error, provider) => {
658
+ const err = error;
659
+ if (err.status === 429) {
660
+ return new LLMRateLimitError({
661
+ message: err.message ?? "Rate limit exceeded",
662
+ provider,
663
+ retryAfterMs: 6e4
664
+ });
665
+ }
666
+ return new LLMError({
667
+ message: err.message ?? String(error),
668
+ provider,
669
+ cause: error
670
+ });
671
+ };
672
+ var OpenAIProviderLive = Layer4.effect(
673
+ LLMService,
674
+ Effect5.gen(function* () {
675
+ const config = yield* LLMConfig;
676
+ const createClient = () => {
677
+ const OpenAI = __require("openai").default;
678
+ return new OpenAI({ apiKey: config.openaiApiKey });
679
+ };
680
+ let _client = null;
681
+ const getClient = () => {
682
+ if (!_client) _client = createClient();
683
+ return _client;
684
+ };
685
+ const defaultModel = config.defaultModel.startsWith("claude") ? "gpt-4o" : config.defaultModel;
686
+ return LLMService.of({
687
+ complete: (request) => Effect5.gen(function* () {
688
+ const client = getClient();
689
+ const model = request.model?.model ?? defaultModel;
690
+ const response = yield* Effect5.tryPromise({
691
+ try: () => client.chat.completions.create({
692
+ model,
693
+ max_tokens: request.maxTokens ?? config.defaultMaxTokens,
694
+ temperature: request.temperature ?? config.defaultTemperature,
695
+ messages: toOpenAIMessages(request.messages),
696
+ stop: request.stopSequences ? [...request.stopSequences] : void 0
697
+ }),
698
+ catch: (error) => toEffectError2(error, "openai")
699
+ });
700
+ return mapOpenAIResponse(response, model);
701
+ }).pipe(
702
+ Effect5.retry(retryPolicy),
703
+ Effect5.timeout("30 seconds"),
704
+ Effect5.catchTag(
705
+ "TimeoutException",
706
+ () => Effect5.fail(
707
+ new LLMTimeoutError({
708
+ message: "LLM request timed out",
709
+ provider: "openai",
710
+ timeoutMs: 3e4
711
+ })
712
+ )
713
+ )
714
+ ),
715
+ stream: (request) => Effect5.gen(function* () {
716
+ const client = getClient();
717
+ const model = request.model?.model ?? defaultModel;
718
+ return Stream2.async((emit) => {
719
+ const doStream = async () => {
720
+ try {
721
+ const stream = await client.chat.completions.create({
722
+ model,
723
+ max_tokens: request.maxTokens ?? config.defaultMaxTokens,
724
+ temperature: request.temperature ?? config.defaultTemperature,
725
+ messages: toOpenAIMessages(request.messages),
726
+ stream: true
727
+ });
728
+ let fullContent = "";
729
+ for await (const chunk of stream) {
730
+ const delta = chunk.choices[0]?.delta?.content;
731
+ if (delta) {
732
+ fullContent += delta;
733
+ emit.single({ type: "text_delta", text: delta });
734
+ }
735
+ if (chunk.choices[0]?.finish_reason) {
736
+ emit.single({
737
+ type: "content_complete",
738
+ content: fullContent
739
+ });
740
+ const inputTokens = chunk.usage?.prompt_tokens ?? 0;
741
+ const outputTokens = chunk.usage?.completion_tokens ?? 0;
742
+ emit.single({
743
+ type: "usage",
744
+ usage: {
745
+ inputTokens,
746
+ outputTokens,
747
+ totalTokens: inputTokens + outputTokens,
748
+ estimatedCost: calculateCost(
749
+ inputTokens,
750
+ outputTokens,
751
+ model
752
+ )
753
+ }
754
+ });
755
+ emit.end();
756
+ }
757
+ }
758
+ } catch (error) {
759
+ const err = error;
760
+ emit.fail(
761
+ new LLMError({
762
+ message: err.message ?? String(error),
763
+ provider: "openai",
764
+ cause: error
765
+ })
766
+ );
767
+ }
768
+ };
769
+ void doStream();
770
+ });
771
+ }),
772
+ completeStructured: (request) => Effect5.gen(function* () {
773
+ const schemaStr = JSON.stringify(
774
+ Schema3.encodedSchema(request.outputSchema),
775
+ null,
776
+ 2
777
+ );
778
+ const messagesWithFormat = [
779
+ ...request.messages,
780
+ {
781
+ role: "user",
782
+ content: `
783
+ Respond with ONLY valid JSON matching this schema:
784
+ ${schemaStr}
785
+
786
+ No markdown, no code fences, just raw JSON.`
787
+ }
788
+ ];
789
+ let lastError = null;
790
+ const maxRetries = request.maxParseRetries ?? 2;
791
+ for (let attempt = 0; attempt <= maxRetries; attempt++) {
792
+ const msgs = attempt === 0 ? messagesWithFormat : [
793
+ ...messagesWithFormat,
794
+ {
795
+ role: "assistant",
796
+ content: String(lastError)
797
+ },
798
+ {
799
+ role: "user",
800
+ content: `That response was not valid JSON. The parse error was: ${String(lastError)}. Please try again with valid JSON only.`
801
+ }
802
+ ];
803
+ const client = getClient();
804
+ const completeResult = yield* Effect5.tryPromise({
805
+ try: () => client.chat.completions.create({
806
+ model: request.model?.model ?? defaultModel,
807
+ max_tokens: request.maxTokens ?? config.defaultMaxTokens,
808
+ temperature: request.temperature ?? config.defaultTemperature,
809
+ messages: toOpenAIMessages(msgs)
810
+ }),
811
+ catch: (error) => toEffectError2(error, "openai")
812
+ });
813
+ const response = mapOpenAIResponse(
814
+ completeResult,
815
+ request.model?.model ?? defaultModel
816
+ );
817
+ try {
818
+ const parsed = JSON.parse(response.content);
819
+ const decoded = Schema3.decodeUnknownEither(
820
+ request.outputSchema
821
+ )(parsed);
822
+ if (decoded._tag === "Right") {
823
+ return decoded.right;
824
+ }
825
+ lastError = decoded.left;
826
+ } catch (e) {
827
+ lastError = e;
828
+ }
829
+ }
830
+ return yield* Effect5.fail(
831
+ new LLMParseError({
832
+ message: `Failed to parse structured output after ${maxRetries + 1} attempts`,
833
+ rawOutput: String(lastError),
834
+ expectedSchema: schemaStr
835
+ })
836
+ );
837
+ }),
838
+ embed: (texts, model) => Effect5.tryPromise({
839
+ try: async () => {
840
+ const client = getClient();
841
+ const embeddingModel = model ?? config.embeddingConfig.model;
842
+ const batchSize = config.embeddingConfig.batchSize ?? 100;
843
+ const results = [];
844
+ for (let i = 0; i < texts.length; i += batchSize) {
845
+ const batch = texts.slice(i, i + batchSize);
846
+ const response = await client.embeddings.create({
847
+ model: embeddingModel,
848
+ input: [...batch],
849
+ dimensions: config.embeddingConfig.dimensions
850
+ });
851
+ results.push(
852
+ ...response.data.map(
853
+ (d) => d.embedding
854
+ )
855
+ );
856
+ }
857
+ return results;
858
+ },
859
+ catch: (error) => new LLMError({
860
+ message: `Embedding failed: ${error}`,
861
+ provider: "openai",
862
+ cause: error
863
+ })
864
+ }),
865
+ countTokens: (messages) => Effect5.gen(function* () {
866
+ return yield* estimateTokenCount(messages);
867
+ }),
868
+ getModelConfig: () => Effect5.succeed({
869
+ provider: "openai",
870
+ model: defaultModel
871
+ })
872
+ });
873
+ })
874
+ );
875
+ var mapOpenAIResponse = (response, model) => {
876
+ const content = response.choices[0]?.message?.content ?? "";
877
+ const stopReason = response.choices[0]?.finish_reason === "stop" ? "end_turn" : response.choices[0]?.finish_reason === "length" ? "max_tokens" : "end_turn";
878
+ return {
879
+ content,
880
+ stopReason,
881
+ usage: {
882
+ inputTokens: response.usage?.prompt_tokens ?? 0,
883
+ outputTokens: response.usage?.completion_tokens ?? 0,
884
+ totalTokens: response.usage?.total_tokens ?? 0,
885
+ estimatedCost: calculateCost(
886
+ response.usage?.prompt_tokens ?? 0,
887
+ response.usage?.completion_tokens ?? 0,
888
+ model
889
+ )
890
+ },
891
+ model: response.model ?? model
892
+ };
893
+ };
894
+
895
+ // src/providers/local.ts
896
+ import { Effect as Effect6, Layer as Layer5, Stream as Stream3, Schema as Schema4 } from "effect";
897
+ var toOllamaMessages = (messages) => messages.map((m) => ({
898
+ role: m.role,
899
+ content: typeof m.content === "string" ? m.content : m.content.filter(
900
+ (b) => b.type === "text"
901
+ ).map((b) => b.text).join("")
902
+ }));
903
+ var LocalProviderLive = Layer5.effect(
904
+ LLMService,
905
+ Effect6.gen(function* () {
906
+ const config = yield* LLMConfig;
907
+ const endpoint = config.ollamaEndpoint ?? "http://localhost:11434";
908
+ const defaultModel = config.defaultModel.startsWith("claude") || config.defaultModel.startsWith("gpt") ? "llama3" : config.defaultModel;
909
+ return LLMService.of({
910
+ complete: (request) => Effect6.gen(function* () {
911
+ const model = request.model?.model ?? defaultModel;
912
+ const response = yield* Effect6.tryPromise({
913
+ try: async () => {
914
+ const res = await fetch(`${endpoint}/api/chat`, {
915
+ method: "POST",
916
+ headers: { "Content-Type": "application/json" },
917
+ body: JSON.stringify({
918
+ model,
919
+ messages: toOllamaMessages(request.messages),
920
+ stream: false,
921
+ options: {
922
+ temperature: request.temperature ?? config.defaultTemperature,
923
+ num_predict: request.maxTokens ?? config.defaultMaxTokens,
924
+ stop: request.stopSequences ? [...request.stopSequences] : void 0
925
+ }
926
+ })
927
+ });
928
+ if (!res.ok) {
929
+ throw new Error(
930
+ `Ollama request failed: ${res.status} ${res.statusText}`
931
+ );
932
+ }
933
+ return await res.json();
934
+ },
935
+ catch: (error) => new LLMError({
936
+ message: `Ollama request failed: ${error}`,
937
+ provider: "ollama",
938
+ cause: error
939
+ })
940
+ });
941
+ const content = response.message?.content ?? "";
942
+ const inputTokens = response.prompt_eval_count ?? 0;
943
+ const outputTokens = response.eval_count ?? 0;
944
+ return {
945
+ content,
946
+ stopReason: response.done_reason === "stop" ? "end_turn" : response.done_reason === "length" ? "max_tokens" : "end_turn",
947
+ usage: {
948
+ inputTokens,
949
+ outputTokens,
950
+ totalTokens: inputTokens + outputTokens,
951
+ estimatedCost: 0
952
+ // Local models are free
953
+ },
954
+ model: response.model ?? model
955
+ };
956
+ }).pipe(
957
+ Effect6.retry(retryPolicy),
958
+ Effect6.timeout("60 seconds"),
959
+ Effect6.catchTag(
960
+ "TimeoutException",
961
+ () => Effect6.fail(
962
+ new LLMTimeoutError({
963
+ message: "Local LLM request timed out",
964
+ provider: "ollama",
965
+ timeoutMs: 6e4
966
+ })
967
+ )
968
+ )
969
+ ),
970
+ stream: (request) => Effect6.gen(function* () {
971
+ const model = request.model?.model ?? defaultModel;
972
+ return Stream3.async((emit) => {
973
+ const doStream = async () => {
974
+ try {
975
+ const res = await fetch(`${endpoint}/api/chat`, {
976
+ method: "POST",
977
+ headers: { "Content-Type": "application/json" },
978
+ body: JSON.stringify({
979
+ model,
980
+ messages: toOllamaMessages(request.messages),
981
+ stream: true,
982
+ options: {
983
+ temperature: request.temperature ?? config.defaultTemperature,
984
+ num_predict: request.maxTokens ?? config.defaultMaxTokens
985
+ }
986
+ })
987
+ });
988
+ if (!res.ok || !res.body) {
989
+ throw new Error(`Ollama stream failed: ${res.status}`);
990
+ }
991
+ const reader = res.body.getReader();
992
+ const decoder = new TextDecoder();
993
+ let fullContent = "";
994
+ while (true) {
995
+ const { done, value } = await reader.read();
996
+ if (done) break;
997
+ const lines = decoder.decode(value, { stream: true }).split("\n").filter(Boolean);
998
+ for (const line of lines) {
999
+ const parsed = JSON.parse(line);
1000
+ if (parsed.message?.content) {
1001
+ fullContent += parsed.message.content;
1002
+ emit.single({
1003
+ type: "text_delta",
1004
+ text: parsed.message.content
1005
+ });
1006
+ }
1007
+ if (parsed.done) {
1008
+ emit.single({
1009
+ type: "content_complete",
1010
+ content: fullContent
1011
+ });
1012
+ emit.single({
1013
+ type: "usage",
1014
+ usage: {
1015
+ inputTokens: parsed.prompt_eval_count ?? 0,
1016
+ outputTokens: parsed.eval_count ?? 0,
1017
+ totalTokens: (parsed.prompt_eval_count ?? 0) + (parsed.eval_count ?? 0),
1018
+ estimatedCost: 0
1019
+ }
1020
+ });
1021
+ emit.end();
1022
+ }
1023
+ }
1024
+ }
1025
+ } catch (error) {
1026
+ const err = error;
1027
+ emit.fail(
1028
+ new LLMError({
1029
+ message: err.message ?? String(error),
1030
+ provider: "ollama",
1031
+ cause: error
1032
+ })
1033
+ );
1034
+ }
1035
+ };
1036
+ void doStream();
1037
+ });
1038
+ }),
1039
+ completeStructured: (request) => Effect6.gen(function* () {
1040
+ const schemaStr = JSON.stringify(
1041
+ Schema4.encodedSchema(request.outputSchema),
1042
+ null,
1043
+ 2
1044
+ );
1045
+ const messagesWithFormat = [
1046
+ ...request.messages,
1047
+ {
1048
+ role: "user",
1049
+ content: `
1050
+ Respond with ONLY valid JSON matching this schema:
1051
+ ${schemaStr}
1052
+
1053
+ No markdown, no code fences, just raw JSON.`
1054
+ }
1055
+ ];
1056
+ let lastError = null;
1057
+ const maxRetries = request.maxParseRetries ?? 2;
1058
+ const llm = {
1059
+ complete: (req) => Effect6.gen(function* () {
1060
+ const model = req.model?.model ?? defaultModel;
1061
+ const res = yield* Effect6.tryPromise({
1062
+ try: async () => {
1063
+ const resp = await fetch(`${endpoint}/api/chat`, {
1064
+ method: "POST",
1065
+ headers: { "Content-Type": "application/json" },
1066
+ body: JSON.stringify({
1067
+ model,
1068
+ messages: toOllamaMessages(req.messages),
1069
+ stream: false,
1070
+ options: {
1071
+ temperature: req.temperature ?? config.defaultTemperature,
1072
+ num_predict: req.maxTokens ?? config.defaultMaxTokens
1073
+ }
1074
+ })
1075
+ });
1076
+ return await resp.json();
1077
+ },
1078
+ catch: (error) => new LLMError({
1079
+ message: `Ollama request failed: ${error}`,
1080
+ provider: "ollama",
1081
+ cause: error
1082
+ })
1083
+ });
1084
+ const content = res.message?.content ?? "";
1085
+ const inputTokens = res.prompt_eval_count ?? 0;
1086
+ const outputTokens = res.eval_count ?? 0;
1087
+ return {
1088
+ content,
1089
+ stopReason: "end_turn",
1090
+ usage: {
1091
+ inputTokens,
1092
+ outputTokens,
1093
+ totalTokens: inputTokens + outputTokens,
1094
+ estimatedCost: 0
1095
+ },
1096
+ model: res.model ?? model
1097
+ };
1098
+ })
1099
+ };
1100
+ for (let attempt = 0; attempt <= maxRetries; attempt++) {
1101
+ const msgs = attempt === 0 ? messagesWithFormat : [
1102
+ ...messagesWithFormat,
1103
+ {
1104
+ role: "assistant",
1105
+ content: String(lastError)
1106
+ },
1107
+ {
1108
+ role: "user",
1109
+ content: `That response was not valid JSON. The parse error was: ${String(lastError)}. Please try again with valid JSON only.`
1110
+ }
1111
+ ];
1112
+ const response = yield* llm.complete({
1113
+ ...request,
1114
+ messages: msgs
1115
+ });
1116
+ try {
1117
+ const parsed = JSON.parse(response.content);
1118
+ const decoded = Schema4.decodeUnknownEither(
1119
+ request.outputSchema
1120
+ )(parsed);
1121
+ if (decoded._tag === "Right") {
1122
+ return decoded.right;
1123
+ }
1124
+ lastError = decoded.left;
1125
+ } catch (e) {
1126
+ lastError = e;
1127
+ }
1128
+ }
1129
+ return yield* Effect6.fail(
1130
+ new LLMParseError({
1131
+ message: `Failed to parse structured output after ${maxRetries + 1} attempts`,
1132
+ rawOutput: String(lastError),
1133
+ expectedSchema: schemaStr
1134
+ })
1135
+ );
1136
+ }),
1137
+ embed: (texts, model) => Effect6.tryPromise({
1138
+ try: async () => {
1139
+ const embeddingModel = model ?? config.embeddingConfig.model ?? "nomic-embed-text";
1140
+ return Promise.all(
1141
+ [...texts].map(async (text) => {
1142
+ const res = await fetch(`${endpoint}/api/embed`, {
1143
+ method: "POST",
1144
+ headers: { "Content-Type": "application/json" },
1145
+ body: JSON.stringify({
1146
+ model: embeddingModel,
1147
+ input: text
1148
+ })
1149
+ });
1150
+ const data = await res.json();
1151
+ return data.embeddings[0];
1152
+ })
1153
+ );
1154
+ },
1155
+ catch: (error) => new LLMError({
1156
+ message: `Embedding failed: ${error}`,
1157
+ provider: "ollama",
1158
+ cause: error
1159
+ })
1160
+ }),
1161
+ countTokens: (messages) => Effect6.gen(function* () {
1162
+ return yield* estimateTokenCount(messages);
1163
+ }),
1164
+ getModelConfig: () => Effect6.succeed({
1165
+ provider: "ollama",
1166
+ model: defaultModel
1167
+ })
1168
+ });
1169
+ })
1170
+ );
1171
+
1172
+ // src/providers/gemini.ts
1173
+ import { Effect as Effect7, Layer as Layer6, Stream as Stream4, Schema as Schema5 } from "effect";
1174
+ var toGeminiContents = (messages) => {
1175
+ const result = [];
1176
+ for (const msg of messages) {
1177
+ if (msg.role === "system") continue;
1178
+ const role = msg.role === "assistant" ? "model" : "user";
1179
+ if (typeof msg.content === "string") {
1180
+ result.push({ role, parts: [{ text: msg.content }] });
1181
+ } else {
1182
+ const parts = [];
1183
+ for (const block of msg.content) {
1184
+ if (block.type === "text") {
1185
+ parts.push({ text: block.text });
1186
+ } else if (block.type === "tool_use") {
1187
+ parts.push({
1188
+ functionCall: { name: block.name, args: block.input }
1189
+ });
1190
+ } else if (block.type === "tool_result") {
1191
+ parts.push({
1192
+ functionResponse: {
1193
+ name: "tool",
1194
+ response: { content: block.content }
1195
+ }
1196
+ });
1197
+ }
1198
+ }
1199
+ if (parts.length > 0) {
1200
+ result.push({ role, parts });
1201
+ }
1202
+ }
1203
+ }
1204
+ return result;
1205
+ };
1206
+ var extractSystemPrompt = (messages) => {
1207
+ const sys = messages.find((m) => m.role === "system");
1208
+ if (!sys) return void 0;
1209
+ return typeof sys.content === "string" ? sys.content : void 0;
1210
+ };
1211
+ var toGeminiTools = (tools) => tools.length === 0 ? void 0 : [
1212
+ {
1213
+ functionDeclarations: tools.map((t) => ({
1214
+ name: t.name,
1215
+ description: t.description,
1216
+ parameters: { type: "object", ...t.inputSchema }
1217
+ }))
1218
+ }
1219
+ ];
1220
+ var toEffectError3 = (error) => {
1221
+ const err = error;
1222
+ if (err.status === 429 || err.code === 429) {
1223
+ return new LLMRateLimitError({
1224
+ message: err.message ?? "Rate limit exceeded",
1225
+ provider: "gemini",
1226
+ retryAfterMs: 6e4
1227
+ });
1228
+ }
1229
+ return new LLMError({
1230
+ message: err.message ?? String(error),
1231
+ provider: "gemini",
1232
+ cause: error
1233
+ });
1234
+ };
1235
+ var mapGeminiResponse = (response, model) => {
1236
+ const toolCalls = response.functionCalls?.map((fc, i) => ({
1237
+ id: `call_${i}`,
1238
+ name: fc.name,
1239
+ input: fc.args
1240
+ }));
1241
+ const inputTokens = response.usageMetadata?.promptTokenCount ?? 0;
1242
+ const outputTokens = response.usageMetadata?.candidatesTokenCount ?? 0;
1243
+ return {
1244
+ content: response.text ?? "",
1245
+ stopReason: toolCalls?.length ? "tool_use" : "end_turn",
1246
+ usage: {
1247
+ inputTokens,
1248
+ outputTokens,
1249
+ totalTokens: inputTokens + outputTokens,
1250
+ estimatedCost: calculateCost(inputTokens, outputTokens, model)
1251
+ },
1252
+ model,
1253
+ toolCalls: toolCalls?.length ? toolCalls : void 0
1254
+ };
1255
+ };
1256
+ var GeminiProviderLive = Layer6.effect(
1257
+ LLMService,
1258
+ Effect7.gen(function* () {
1259
+ const config = yield* LLMConfig;
1260
+ let _clientPromise = null;
1261
+ const getClient = () => {
1262
+ if (!_clientPromise) {
1263
+ _clientPromise = import("@google/genai").then(({ GoogleGenAI }) => new GoogleGenAI({ apiKey: config.googleApiKey }));
1264
+ }
1265
+ return _clientPromise;
1266
+ };
1267
+ const buildGeminiConfig = (opts) => {
1268
+ const cfg = {
1269
+ maxOutputTokens: opts.maxTokens ?? config.defaultMaxTokens,
1270
+ temperature: opts.temperature ?? config.defaultTemperature
1271
+ };
1272
+ const sys = opts.systemPrompt;
1273
+ if (sys) cfg.systemInstruction = sys;
1274
+ if (opts.stopSequences?.length) cfg.stopSequences = [...opts.stopSequences];
1275
+ if (opts.tools?.length) {
1276
+ cfg.tools = toGeminiTools([...opts.tools]);
1277
+ }
1278
+ return cfg;
1279
+ };
1280
+ return LLMService.of({
1281
+ complete: (request) => Effect7.gen(function* () {
1282
+ const client = yield* Effect7.promise(() => getClient());
1283
+ const model = request.model?.model ?? config.defaultModel;
1284
+ const contents = toGeminiContents(request.messages);
1285
+ const systemPrompt = extractSystemPrompt(request.messages) ?? request.systemPrompt;
1286
+ const response = yield* Effect7.tryPromise({
1287
+ try: () => client.models.generateContent({
1288
+ model,
1289
+ contents,
1290
+ config: buildGeminiConfig({
1291
+ maxTokens: request.maxTokens,
1292
+ temperature: request.temperature,
1293
+ systemPrompt,
1294
+ stopSequences: request.stopSequences,
1295
+ tools: request.tools
1296
+ })
1297
+ }),
1298
+ catch: toEffectError3
1299
+ });
1300
+ return mapGeminiResponse(response, model);
1301
+ }).pipe(
1302
+ Effect7.retry(retryPolicy),
1303
+ Effect7.timeout("30 seconds"),
1304
+ Effect7.catchTag(
1305
+ "TimeoutException",
1306
+ () => Effect7.fail(
1307
+ new LLMTimeoutError({
1308
+ message: "LLM request timed out",
1309
+ provider: "gemini",
1310
+ timeoutMs: 3e4
1311
+ })
1312
+ )
1313
+ )
1314
+ ),
1315
+ stream: (request) => Effect7.gen(function* () {
1316
+ const model = request.model?.model ?? config.defaultModel;
1317
+ const contents = toGeminiContents(request.messages);
1318
+ const systemPrompt = extractSystemPrompt(request.messages) ?? request.systemPrompt;
1319
+ return Stream4.async((emit) => {
1320
+ void (async () => {
1321
+ try {
1322
+ const client = await getClient();
1323
+ const stream = await client.models.generateContentStream({
1324
+ model,
1325
+ contents,
1326
+ config: buildGeminiConfig({
1327
+ maxTokens: request.maxTokens,
1328
+ temperature: request.temperature,
1329
+ systemPrompt
1330
+ })
1331
+ });
1332
+ let fullContent = "";
1333
+ let inputTokens = 0;
1334
+ let outputTokens = 0;
1335
+ for await (const chunk of stream) {
1336
+ if (chunk.text) {
1337
+ emit.single({ type: "text_delta", text: chunk.text });
1338
+ fullContent += chunk.text;
1339
+ }
1340
+ if (chunk.usageMetadata) {
1341
+ inputTokens = chunk.usageMetadata.promptTokenCount ?? 0;
1342
+ outputTokens = chunk.usageMetadata.candidatesTokenCount ?? 0;
1343
+ }
1344
+ }
1345
+ emit.single({ type: "content_complete", content: fullContent });
1346
+ emit.single({
1347
+ type: "usage",
1348
+ usage: {
1349
+ inputTokens,
1350
+ outputTokens,
1351
+ totalTokens: inputTokens + outputTokens,
1352
+ estimatedCost: calculateCost(inputTokens, outputTokens, model)
1353
+ }
1354
+ });
1355
+ emit.end();
1356
+ } catch (error) {
1357
+ const err = error;
1358
+ emit.fail(
1359
+ new LLMError({
1360
+ message: err.message ?? String(error),
1361
+ provider: "gemini",
1362
+ cause: error
1363
+ })
1364
+ );
1365
+ }
1366
+ })();
1367
+ });
1368
+ }),
1369
+ completeStructured: (request) => Effect7.gen(function* () {
1370
+ const schemaStr = JSON.stringify(
1371
+ Schema5.encodedSchema(request.outputSchema),
1372
+ null,
1373
+ 2
1374
+ );
1375
+ const messagesWithFormat = [
1376
+ ...request.messages,
1377
+ {
1378
+ role: "user",
1379
+ content: `
1380
+ Respond with ONLY valid JSON matching this schema:
1381
+ ${schemaStr}
1382
+
1383
+ No markdown, no code fences, just raw JSON.`
1384
+ }
1385
+ ];
1386
+ let lastError = null;
1387
+ const maxRetries = request.maxParseRetries ?? 2;
1388
+ for (let attempt = 0; attempt <= maxRetries; attempt++) {
1389
+ const msgs = attempt === 0 ? messagesWithFormat : [
1390
+ ...messagesWithFormat,
1391
+ {
1392
+ role: "assistant",
1393
+ content: String(lastError)
1394
+ },
1395
+ {
1396
+ role: "user",
1397
+ content: `That response was not valid JSON. The parse error was: ${String(lastError)}. Please try again with valid JSON only.`
1398
+ }
1399
+ ];
1400
+ const client = yield* Effect7.promise(() => getClient());
1401
+ const model = request.model?.model ?? config.defaultModel;
1402
+ const response = yield* Effect7.tryPromise({
1403
+ try: () => client.models.generateContent({
1404
+ model,
1405
+ contents: toGeminiContents(msgs),
1406
+ config: buildGeminiConfig({
1407
+ maxTokens: request.maxTokens,
1408
+ temperature: request.temperature,
1409
+ systemPrompt: request.systemPrompt
1410
+ })
1411
+ }),
1412
+ catch: toEffectError3
1413
+ });
1414
+ const mapped = mapGeminiResponse(response, model);
1415
+ try {
1416
+ const parsed = JSON.parse(mapped.content);
1417
+ const decoded = Schema5.decodeUnknownEither(
1418
+ request.outputSchema
1419
+ )(parsed);
1420
+ if (decoded._tag === "Right") {
1421
+ return decoded.right;
1422
+ }
1423
+ lastError = decoded.left;
1424
+ } catch (e) {
1425
+ lastError = e;
1426
+ }
1427
+ }
1428
+ return yield* Effect7.fail(
1429
+ new LLMParseError({
1430
+ message: `Failed to parse structured output after ${maxRetries + 1} attempts`,
1431
+ rawOutput: String(lastError),
1432
+ expectedSchema: schemaStr
1433
+ })
1434
+ );
1435
+ }),
1436
+ embed: (texts, model) => Effect7.tryPromise({
1437
+ try: async () => {
1438
+ const client = await getClient();
1439
+ const embeddingModel = model ?? "gemini-embedding-001";
1440
+ const result = await client.models.embedContent({
1441
+ model: embeddingModel,
1442
+ contents: [...texts],
1443
+ config: {
1444
+ outputDimensionality: config.embeddingConfig.dimensions
1445
+ }
1446
+ });
1447
+ return result.embeddings.map((e) => e.values);
1448
+ },
1449
+ catch: (error) => new LLMError({
1450
+ message: `Embedding failed: ${error}`,
1451
+ provider: "gemini",
1452
+ cause: error
1453
+ })
1454
+ }),
1455
+ countTokens: (messages) => Effect7.gen(function* () {
1456
+ return yield* estimateTokenCount(messages);
1457
+ }),
1458
+ getModelConfig: () => Effect7.succeed({
1459
+ provider: "gemini",
1460
+ model: config.defaultModel
1461
+ })
1462
+ });
1463
+ })
1464
+ );
1465
+
1466
+ // src/testing.ts
1467
+ import { Effect as Effect8, Layer as Layer7, Stream as Stream5, Schema as Schema6 } from "effect";
1468
+ var TestLLMService = (responses) => ({
1469
+ complete: (request) => Effect8.gen(function* () {
1470
+ const lastMessage = request.messages[request.messages.length - 1];
1471
+ const content = lastMessage && typeof lastMessage.content === "string" ? lastMessage.content : "";
1472
+ const systemPrompt = typeof request.systemPrompt === "string" ? request.systemPrompt : "";
1473
+ const searchText = `${content} ${systemPrompt}`;
1474
+ for (const [pattern, response] of Object.entries(responses)) {
1475
+ if (pattern.length > 0 && searchText.includes(pattern)) {
1476
+ return {
1477
+ content: response,
1478
+ stopReason: "end_turn",
1479
+ usage: {
1480
+ inputTokens: Math.ceil(content.length / 4),
1481
+ outputTokens: Math.ceil(response.length / 4),
1482
+ totalTokens: Math.ceil(content.length / 4) + Math.ceil(response.length / 4),
1483
+ estimatedCost: 0
1484
+ },
1485
+ model: "test-model"
1486
+ };
1487
+ }
1488
+ }
1489
+ return {
1490
+ content: "Test response",
1491
+ stopReason: "end_turn",
1492
+ usage: {
1493
+ inputTokens: 0,
1494
+ outputTokens: 0,
1495
+ totalTokens: 0,
1496
+ estimatedCost: 0
1497
+ },
1498
+ model: "test-model"
1499
+ };
1500
+ }),
1501
+ stream: (_request) => Effect8.succeed(
1502
+ Stream5.make(
1503
+ { type: "text_delta", text: "Test " },
1504
+ { type: "text_delta", text: "response" },
1505
+ {
1506
+ type: "content_complete",
1507
+ content: "Test response"
1508
+ },
1509
+ {
1510
+ type: "usage",
1511
+ usage: {
1512
+ inputTokens: 0,
1513
+ outputTokens: 0,
1514
+ totalTokens: 0,
1515
+ estimatedCost: 0
1516
+ }
1517
+ }
1518
+ )
1519
+ ),
1520
+ completeStructured: (request) => Effect8.gen(function* () {
1521
+ const lastMessage = request.messages[request.messages.length - 1];
1522
+ const content = lastMessage && typeof lastMessage.content === "string" ? lastMessage.content : "";
1523
+ let responseContent = "Test response";
1524
+ for (const [pattern, response] of Object.entries(responses)) {
1525
+ if (content.includes(pattern)) {
1526
+ responseContent = response;
1527
+ break;
1528
+ }
1529
+ }
1530
+ const parsed = JSON.parse(responseContent);
1531
+ return Schema6.decodeUnknownSync(request.outputSchema)(parsed);
1532
+ }),
1533
+ embed: (texts) => Effect8.succeed(
1534
+ texts.map(() => new Array(768).fill(0).map(() => Math.random()))
1535
+ ),
1536
+ countTokens: (messages) => Effect8.succeed(
1537
+ messages.reduce(
1538
+ (sum, m) => sum + (typeof m.content === "string" ? Math.ceil(m.content.length / 4) : 100),
1539
+ 0
1540
+ )
1541
+ ),
1542
+ getModelConfig: () => Effect8.succeed({
1543
+ provider: "anthropic",
1544
+ model: "test-model"
1545
+ })
1546
+ });
1547
+ var TestLLMServiceLayer = (responses = {}) => Layer7.succeed(LLMService, LLMService.of(TestLLMService(responses)));
1548
+
1549
+ // src/structured-output.ts
1550
+ import { Schema as Schema7 } from "effect";
1551
+ var ReActActionSchema = Schema7.Struct({
1552
+ thought: Schema7.String,
1553
+ action: Schema7.optional(
1554
+ Schema7.Struct({
1555
+ tool: Schema7.String,
1556
+ input: Schema7.Unknown
1557
+ })
1558
+ ),
1559
+ finalAnswer: Schema7.optional(Schema7.String),
1560
+ isComplete: Schema7.Boolean
1561
+ });
1562
+ var PlanSchema = Schema7.Struct({
1563
+ goal: Schema7.String,
1564
+ steps: Schema7.Array(
1565
+ Schema7.Struct({
1566
+ id: Schema7.Number,
1567
+ description: Schema7.String,
1568
+ tool: Schema7.optional(Schema7.String),
1569
+ dependsOn: Schema7.optional(Schema7.Array(Schema7.Number)),
1570
+ estimatedDuration: Schema7.optional(Schema7.String)
1571
+ })
1572
+ )
1573
+ });
1574
+ var ReflectionSchema = Schema7.Struct({
1575
+ taskAccomplished: Schema7.Boolean,
1576
+ confidence: Schema7.Number,
1577
+ strengths: Schema7.Array(Schema7.String),
1578
+ weaknesses: Schema7.Array(Schema7.String),
1579
+ needsRefinement: Schema7.Boolean,
1580
+ refinementSuggestions: Schema7.optional(Schema7.Array(Schema7.String))
1581
+ });
1582
+ var StrategySelectionSchema = Schema7.Struct({
1583
+ selectedStrategy: Schema7.String,
1584
+ reasoning: Schema7.String,
1585
+ confidence: Schema7.Number,
1586
+ alternativeStrategies: Schema7.Array(
1587
+ Schema7.Struct({
1588
+ strategy: Schema7.String,
1589
+ whyNot: Schema7.String
1590
+ })
1591
+ )
1592
+ });
1593
+ var ThoughtEvaluationSchema = Schema7.Struct({
1594
+ score: Schema7.Number,
1595
+ reasoning: Schema7.String,
1596
+ strengths: Schema7.Array(Schema7.String),
1597
+ weaknesses: Schema7.Array(Schema7.String),
1598
+ shouldExpand: Schema7.Boolean
1599
+ });
1600
+ var ComplexityAnalysisSchema = Schema7.Struct({
1601
+ score: Schema7.Number,
1602
+ factors: Schema7.Array(
1603
+ Schema7.Struct({
1604
+ factor: Schema7.String,
1605
+ weight: Schema7.Number,
1606
+ reasoning: Schema7.String
1607
+ })
1608
+ ),
1609
+ recommendedStrategy: Schema7.String,
1610
+ recommendedModel: Schema7.String
1611
+ });
1612
+
1613
+ // src/runtime.ts
1614
+ import { Layer as Layer8 } from "effect";
1615
+ var createLLMProviderLayer = (provider = "anthropic", testResponses) => {
1616
+ if (provider === "test") {
1617
+ return Layer8.mergeAll(
1618
+ TestLLMServiceLayer(testResponses ?? {}),
1619
+ PromptManagerLive
1620
+ );
1621
+ }
1622
+ const configLayer = LLMConfigFromEnv;
1623
+ const providerLayer = provider === "anthropic" ? AnthropicProviderLive : provider === "openai" ? OpenAIProviderLive : provider === "gemini" ? GeminiProviderLive : LocalProviderLive;
1624
+ return Layer8.mergeAll(
1625
+ providerLayer.pipe(Layer8.provide(configLayer)),
1626
+ PromptManagerLive
1627
+ );
1628
+ };
1629
+ var createLLMProviderLayerWithConfig = (config, provider = "anthropic") => {
1630
+ const configLayer = Layer8.succeed(LLMConfig, config);
1631
+ const providerLayer = provider === "anthropic" ? AnthropicProviderLive : provider === "openai" ? OpenAIProviderLive : provider === "gemini" ? GeminiProviderLive : LocalProviderLive;
1632
+ return Layer8.mergeAll(
1633
+ providerLayer.pipe(Layer8.provide(configLayer)),
1634
+ PromptManagerLive
1635
+ );
1636
+ };
1637
+ export {
1638
+ AnthropicProviderLive,
1639
+ CacheControlSchema,
1640
+ CompletionResponseSchema,
1641
+ ComplexityAnalysisSchema,
1642
+ DefaultEmbeddingConfig,
1643
+ EmbeddingConfigSchema,
1644
+ GeminiProviderLive,
1645
+ ImageContentBlockSchema,
1646
+ ImageSourceSchema,
1647
+ LLMConfig,
1648
+ LLMConfigFromEnv,
1649
+ LLMContextOverflowError,
1650
+ LLMError,
1651
+ LLMParseError,
1652
+ LLMProviderType,
1653
+ LLMRateLimitError,
1654
+ LLMService,
1655
+ LLMTimeoutError,
1656
+ LocalProviderLive,
1657
+ ModelConfigSchema,
1658
+ ModelPresets,
1659
+ OpenAIProviderLive,
1660
+ PlanSchema,
1661
+ PromptManager,
1662
+ PromptManagerLive,
1663
+ ReActActionSchema,
1664
+ ReflectionSchema,
1665
+ StopReasonSchema,
1666
+ StrategySelectionSchema,
1667
+ TestLLMService,
1668
+ TestLLMServiceLayer,
1669
+ TextContentBlockSchema,
1670
+ ThoughtEvaluationSchema,
1671
+ TokenUsageSchema,
1672
+ ToolCallSchema,
1673
+ ToolDefinitionSchema,
1674
+ ToolResultContentBlockSchema,
1675
+ ToolUseContentBlockSchema,
1676
+ calculateCost,
1677
+ createLLMProviderLayer,
1678
+ createLLMProviderLayerWithConfig,
1679
+ estimateTokenCount,
1680
+ makeCacheable,
1681
+ retryPolicy
1682
+ };
1683
+ //# sourceMappingURL=index.js.map