@reactive-agents/llm-provider 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +108 -0
- package/dist/index.d.ts +542 -0
- package/dist/index.js +1683 -0
- package/dist/index.js.map +1 -0
- package/package.json +53 -0
package/dist/index.js
ADDED
|
@@ -0,0 +1,1683 @@
|
|
|
1
|
+
var __require = /* @__PURE__ */ ((x) => typeof require !== "undefined" ? require : typeof Proxy !== "undefined" ? new Proxy(x, {
|
|
2
|
+
get: (a, b) => (typeof require !== "undefined" ? require : a)[b]
|
|
3
|
+
}) : x)(function(x) {
|
|
4
|
+
if (typeof require !== "undefined") return require.apply(this, arguments);
|
|
5
|
+
throw Error('Dynamic require of "' + x + '" is not supported');
|
|
6
|
+
});
|
|
7
|
+
|
|
8
|
+
// src/types.ts
|
|
9
|
+
import { Schema } from "effect";
|
|
10
|
+
var LLMProviderType = Schema.Literal(
|
|
11
|
+
"anthropic",
|
|
12
|
+
"openai",
|
|
13
|
+
"ollama",
|
|
14
|
+
"gemini",
|
|
15
|
+
"custom"
|
|
16
|
+
);
|
|
17
|
+
var EmbeddingConfigSchema = Schema.Struct({
|
|
18
|
+
model: Schema.String,
|
|
19
|
+
dimensions: Schema.Number,
|
|
20
|
+
provider: Schema.Literal("openai", "ollama"),
|
|
21
|
+
batchSize: Schema.optional(Schema.Number)
|
|
22
|
+
});
|
|
23
|
+
var DefaultEmbeddingConfig = {
|
|
24
|
+
model: "text-embedding-3-small",
|
|
25
|
+
dimensions: 1536,
|
|
26
|
+
provider: "openai",
|
|
27
|
+
batchSize: 100
|
|
28
|
+
};
|
|
29
|
+
var ModelConfigSchema = Schema.Struct({
|
|
30
|
+
provider: LLMProviderType,
|
|
31
|
+
model: Schema.String,
|
|
32
|
+
maxTokens: Schema.optional(Schema.Number),
|
|
33
|
+
temperature: Schema.optional(Schema.Number),
|
|
34
|
+
topP: Schema.optional(Schema.Number),
|
|
35
|
+
stopSequences: Schema.optional(Schema.Array(Schema.String))
|
|
36
|
+
});
|
|
37
|
+
var ModelPresets = {
|
|
38
|
+
"claude-haiku": {
|
|
39
|
+
provider: "anthropic",
|
|
40
|
+
model: "claude-3-5-haiku-20241022",
|
|
41
|
+
costPer1MInput: 1,
|
|
42
|
+
costPer1MOutput: 5,
|
|
43
|
+
maxContext: 2e5,
|
|
44
|
+
quality: 0.6
|
|
45
|
+
},
|
|
46
|
+
"claude-sonnet": {
|
|
47
|
+
provider: "anthropic",
|
|
48
|
+
model: "claude-sonnet-4-20250514",
|
|
49
|
+
costPer1MInput: 3,
|
|
50
|
+
costPer1MOutput: 15,
|
|
51
|
+
maxContext: 2e5,
|
|
52
|
+
quality: 0.85
|
|
53
|
+
},
|
|
54
|
+
"claude-sonnet-4-5": {
|
|
55
|
+
provider: "anthropic",
|
|
56
|
+
model: "claude-sonnet-4-5-20250929",
|
|
57
|
+
costPer1MInput: 3,
|
|
58
|
+
costPer1MOutput: 15,
|
|
59
|
+
maxContext: 2e5,
|
|
60
|
+
quality: 0.9
|
|
61
|
+
},
|
|
62
|
+
"claude-opus": {
|
|
63
|
+
provider: "anthropic",
|
|
64
|
+
model: "claude-opus-4-20250514",
|
|
65
|
+
costPer1MInput: 15,
|
|
66
|
+
costPer1MOutput: 75,
|
|
67
|
+
maxContext: 1e6,
|
|
68
|
+
quality: 1
|
|
69
|
+
},
|
|
70
|
+
"gpt-4o-mini": {
|
|
71
|
+
provider: "openai",
|
|
72
|
+
model: "gpt-4o-mini",
|
|
73
|
+
costPer1MInput: 0.15,
|
|
74
|
+
costPer1MOutput: 0.6,
|
|
75
|
+
maxContext: 128e3,
|
|
76
|
+
quality: 0.55
|
|
77
|
+
},
|
|
78
|
+
"gpt-4o": {
|
|
79
|
+
provider: "openai",
|
|
80
|
+
model: "gpt-4o",
|
|
81
|
+
costPer1MInput: 2.5,
|
|
82
|
+
costPer1MOutput: 10,
|
|
83
|
+
maxContext: 128e3,
|
|
84
|
+
quality: 0.8
|
|
85
|
+
},
|
|
86
|
+
"gemini-2.0-flash": {
|
|
87
|
+
provider: "gemini",
|
|
88
|
+
model: "gemini-2.0-flash",
|
|
89
|
+
costPer1MInput: 0.1,
|
|
90
|
+
costPer1MOutput: 0.4,
|
|
91
|
+
maxContext: 1e6,
|
|
92
|
+
quality: 0.75
|
|
93
|
+
},
|
|
94
|
+
"gemini-2.5-pro": {
|
|
95
|
+
provider: "gemini",
|
|
96
|
+
model: "gemini-2.5-pro-preview-03-25",
|
|
97
|
+
costPer1MInput: 1.25,
|
|
98
|
+
costPer1MOutput: 10,
|
|
99
|
+
maxContext: 1e6,
|
|
100
|
+
quality: 0.95
|
|
101
|
+
}
|
|
102
|
+
};
|
|
103
|
+
var CacheControlSchema = Schema.Struct({
|
|
104
|
+
type: Schema.Literal("ephemeral")
|
|
105
|
+
});
|
|
106
|
+
var ImageSourceSchema = Schema.Struct({
|
|
107
|
+
type: Schema.Literal("base64", "url"),
|
|
108
|
+
media_type: Schema.Literal(
|
|
109
|
+
"image/png",
|
|
110
|
+
"image/jpeg",
|
|
111
|
+
"image/gif",
|
|
112
|
+
"image/webp"
|
|
113
|
+
),
|
|
114
|
+
data: Schema.String
|
|
115
|
+
});
|
|
116
|
+
var TextContentBlockSchema = Schema.Struct({
|
|
117
|
+
type: Schema.Literal("text"),
|
|
118
|
+
text: Schema.String,
|
|
119
|
+
cache_control: Schema.optional(CacheControlSchema)
|
|
120
|
+
});
|
|
121
|
+
var ImageContentBlockSchema = Schema.Struct({
|
|
122
|
+
type: Schema.Literal("image"),
|
|
123
|
+
source: ImageSourceSchema
|
|
124
|
+
});
|
|
125
|
+
var ToolUseContentBlockSchema = Schema.Struct({
|
|
126
|
+
type: Schema.Literal("tool_use"),
|
|
127
|
+
id: Schema.String,
|
|
128
|
+
name: Schema.String,
|
|
129
|
+
input: Schema.Unknown
|
|
130
|
+
});
|
|
131
|
+
var ToolResultContentBlockSchema = Schema.Struct({
|
|
132
|
+
type: Schema.Literal("tool_result"),
|
|
133
|
+
tool_use_id: Schema.String,
|
|
134
|
+
content: Schema.String
|
|
135
|
+
});
|
|
136
|
+
var makeCacheable = (text) => ({
|
|
137
|
+
type: "text",
|
|
138
|
+
text,
|
|
139
|
+
cache_control: { type: "ephemeral" }
|
|
140
|
+
});
|
|
141
|
+
var TokenUsageSchema = Schema.Struct({
|
|
142
|
+
inputTokens: Schema.Number,
|
|
143
|
+
outputTokens: Schema.Number,
|
|
144
|
+
totalTokens: Schema.Number,
|
|
145
|
+
estimatedCost: Schema.Number
|
|
146
|
+
});
|
|
147
|
+
var StopReasonSchema = Schema.Literal(
|
|
148
|
+
"end_turn",
|
|
149
|
+
"max_tokens",
|
|
150
|
+
"stop_sequence",
|
|
151
|
+
"tool_use"
|
|
152
|
+
);
|
|
153
|
+
var ToolDefinitionSchema = Schema.Struct({
|
|
154
|
+
name: Schema.String,
|
|
155
|
+
description: Schema.String,
|
|
156
|
+
inputSchema: Schema.Record({ key: Schema.String, value: Schema.Unknown })
|
|
157
|
+
});
|
|
158
|
+
var ToolCallSchema = Schema.Struct({
|
|
159
|
+
id: Schema.String,
|
|
160
|
+
name: Schema.String,
|
|
161
|
+
input: Schema.Unknown
|
|
162
|
+
});
|
|
163
|
+
var CompletionResponseSchema = Schema.Struct({
|
|
164
|
+
content: Schema.String,
|
|
165
|
+
stopReason: StopReasonSchema,
|
|
166
|
+
usage: TokenUsageSchema,
|
|
167
|
+
model: Schema.String,
|
|
168
|
+
toolCalls: Schema.optional(Schema.Array(ToolCallSchema))
|
|
169
|
+
});
|
|
170
|
+
|
|
171
|
+
// src/errors.ts
|
|
172
|
+
import { Data } from "effect";
|
|
173
|
+
var LLMError = class extends Data.TaggedError("LLMError") {
|
|
174
|
+
};
|
|
175
|
+
var LLMRateLimitError = class extends Data.TaggedError("LLMRateLimitError") {
|
|
176
|
+
};
|
|
177
|
+
var LLMTimeoutError = class extends Data.TaggedError("LLMTimeoutError") {
|
|
178
|
+
};
|
|
179
|
+
var LLMParseError = class extends Data.TaggedError("LLMParseError") {
|
|
180
|
+
};
|
|
181
|
+
var LLMContextOverflowError = class extends Data.TaggedError(
|
|
182
|
+
"LLMContextOverflowError"
|
|
183
|
+
) {
|
|
184
|
+
};
|
|
185
|
+
|
|
186
|
+
// src/llm-service.ts
|
|
187
|
+
import { Context } from "effect";
|
|
188
|
+
var LLMService = class extends Context.Tag("LLMService")() {
|
|
189
|
+
};
|
|
190
|
+
|
|
191
|
+
// src/llm-config.ts
|
|
192
|
+
import { Context as Context2, Layer } from "effect";
|
|
193
|
+
var LLMConfig = class extends Context2.Tag("LLMConfig")() {
|
|
194
|
+
};
|
|
195
|
+
var LLMConfigFromEnv = Layer.succeed(
|
|
196
|
+
LLMConfig,
|
|
197
|
+
LLMConfig.of({
|
|
198
|
+
defaultProvider: "anthropic",
|
|
199
|
+
defaultModel: process.env.LLM_DEFAULT_MODEL ?? "claude-sonnet-4-20250514",
|
|
200
|
+
anthropicApiKey: process.env.ANTHROPIC_API_KEY,
|
|
201
|
+
openaiApiKey: process.env.OPENAI_API_KEY,
|
|
202
|
+
googleApiKey: process.env.GOOGLE_API_KEY,
|
|
203
|
+
ollamaEndpoint: process.env.OLLAMA_ENDPOINT ?? "http://localhost:11434",
|
|
204
|
+
embeddingConfig: {
|
|
205
|
+
model: process.env.EMBEDDING_MODEL ?? "text-embedding-3-small",
|
|
206
|
+
dimensions: Number(process.env.EMBEDDING_DIMENSIONS ?? 1536),
|
|
207
|
+
provider: process.env.EMBEDDING_PROVIDER ?? "openai",
|
|
208
|
+
batchSize: 100
|
|
209
|
+
},
|
|
210
|
+
supportsPromptCaching: (process.env.LLM_DEFAULT_MODEL ?? "claude-sonnet-4-20250514").startsWith("claude"),
|
|
211
|
+
maxRetries: Number(process.env.LLM_MAX_RETRIES ?? 3),
|
|
212
|
+
timeoutMs: Number(process.env.LLM_TIMEOUT_MS ?? 3e4),
|
|
213
|
+
defaultMaxTokens: 4096,
|
|
214
|
+
defaultTemperature: Number(process.env.LLM_DEFAULT_TEMPERATURE ?? 0.7)
|
|
215
|
+
})
|
|
216
|
+
);
|
|
217
|
+
|
|
218
|
+
// src/prompt-manager.ts
|
|
219
|
+
import { Effect as Effect3, Context as Context3, Layer as Layer2 } from "effect";
|
|
220
|
+
|
|
221
|
+
// src/token-counter.ts
|
|
222
|
+
import { Effect as Effect2 } from "effect";
|
|
223
|
+
var estimateTokenCount = (messages) => Effect2.sync(() => {
|
|
224
|
+
let totalChars = 0;
|
|
225
|
+
for (const msg of messages) {
|
|
226
|
+
if (typeof msg.content === "string") {
|
|
227
|
+
totalChars += msg.content.length;
|
|
228
|
+
} else {
|
|
229
|
+
for (const block of msg.content) {
|
|
230
|
+
if (block.type === "text") {
|
|
231
|
+
totalChars += block.text.length;
|
|
232
|
+
} else if (block.type === "tool_result") {
|
|
233
|
+
totalChars += block.content.length;
|
|
234
|
+
} else if (block.type === "tool_use") {
|
|
235
|
+
totalChars += JSON.stringify(block.input).length;
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
totalChars += 16;
|
|
240
|
+
}
|
|
241
|
+
return Math.ceil(totalChars / 4);
|
|
242
|
+
});
|
|
243
|
+
var calculateCost = (inputTokens, outputTokens, model) => {
|
|
244
|
+
const costMap = {
|
|
245
|
+
"claude-3-5-haiku-20241022": { input: 1, output: 5 },
|
|
246
|
+
"claude-sonnet-4-20250514": { input: 3, output: 15 },
|
|
247
|
+
"claude-sonnet-4-5-20250929": { input: 3, output: 15 },
|
|
248
|
+
"claude-opus-4-20250514": { input: 15, output: 75 },
|
|
249
|
+
"gpt-4o-mini": { input: 0.15, output: 0.6 },
|
|
250
|
+
"gpt-4o": { input: 2.5, output: 10 },
|
|
251
|
+
"gemini-2.0-flash": { input: 0.1, output: 0.4 },
|
|
252
|
+
"gemini-2.5-pro-preview-03-25": { input: 1.25, output: 10 },
|
|
253
|
+
"gemini-embedding-001": { input: 0, output: 0 }
|
|
254
|
+
};
|
|
255
|
+
const costs = costMap[model] ?? { input: 3, output: 15 };
|
|
256
|
+
return inputTokens / 1e6 * costs.input + outputTokens / 1e6 * costs.output;
|
|
257
|
+
};
|
|
258
|
+
|
|
259
|
+
// src/prompt-manager.ts
|
|
260
|
+
var PromptManager = class extends Context3.Tag("PromptManager")() {
|
|
261
|
+
};
|
|
262
|
+
var PromptManagerLive = Layer2.succeed(
|
|
263
|
+
PromptManager,
|
|
264
|
+
PromptManager.of({
|
|
265
|
+
buildPrompt: (options) => Effect3.gen(function* () {
|
|
266
|
+
const {
|
|
267
|
+
systemPrompt,
|
|
268
|
+
messages,
|
|
269
|
+
reserveOutputTokens,
|
|
270
|
+
maxContextTokens,
|
|
271
|
+
truncationStrategy
|
|
272
|
+
} = options;
|
|
273
|
+
const budget = maxContextTokens - reserveOutputTokens;
|
|
274
|
+
const systemMessage = {
|
|
275
|
+
role: "system",
|
|
276
|
+
content: systemPrompt
|
|
277
|
+
};
|
|
278
|
+
const systemTokens = yield* estimateTokenCount([systemMessage]);
|
|
279
|
+
if (systemTokens >= budget) {
|
|
280
|
+
return [systemMessage];
|
|
281
|
+
}
|
|
282
|
+
const remainingBudget = budget - systemTokens;
|
|
283
|
+
const truncated = yield* applyTruncation(
|
|
284
|
+
messages,
|
|
285
|
+
remainingBudget,
|
|
286
|
+
truncationStrategy
|
|
287
|
+
);
|
|
288
|
+
return [systemMessage, ...truncated];
|
|
289
|
+
}),
|
|
290
|
+
fitsInContext: (messages, maxTokens) => Effect3.gen(function* () {
|
|
291
|
+
const count = yield* estimateTokenCount(messages);
|
|
292
|
+
return count <= maxTokens;
|
|
293
|
+
})
|
|
294
|
+
})
|
|
295
|
+
);
|
|
296
|
+
var applyTruncation = (messages, budget, strategy) => Effect3.gen(function* () {
|
|
297
|
+
const totalTokens = yield* estimateTokenCount(messages);
|
|
298
|
+
if (totalTokens <= budget) {
|
|
299
|
+
return messages;
|
|
300
|
+
}
|
|
301
|
+
switch (strategy) {
|
|
302
|
+
case "drop-oldest": {
|
|
303
|
+
const result = [];
|
|
304
|
+
let usedTokens = 0;
|
|
305
|
+
for (let i = messages.length - 1; i >= 0; i--) {
|
|
306
|
+
const msgTokens = yield* estimateTokenCount([messages[i]]);
|
|
307
|
+
if (usedTokens + msgTokens <= budget) {
|
|
308
|
+
result.unshift(messages[i]);
|
|
309
|
+
usedTokens += msgTokens;
|
|
310
|
+
} else {
|
|
311
|
+
break;
|
|
312
|
+
}
|
|
313
|
+
}
|
|
314
|
+
return result;
|
|
315
|
+
}
|
|
316
|
+
case "sliding-window": {
|
|
317
|
+
const result = [];
|
|
318
|
+
let usedTokens = 0;
|
|
319
|
+
for (let i = messages.length - 1; i >= 0; i--) {
|
|
320
|
+
const msgTokens = yield* estimateTokenCount([messages[i]]);
|
|
321
|
+
if (usedTokens + msgTokens <= budget) {
|
|
322
|
+
result.unshift(messages[i]);
|
|
323
|
+
usedTokens += msgTokens;
|
|
324
|
+
} else {
|
|
325
|
+
break;
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
return result;
|
|
329
|
+
}
|
|
330
|
+
case "summarize-middle":
|
|
331
|
+
case "importance-based": {
|
|
332
|
+
const result = [];
|
|
333
|
+
let usedTokens = 0;
|
|
334
|
+
if (messages.length > 0) {
|
|
335
|
+
const firstTokens = yield* estimateTokenCount([messages[0]]);
|
|
336
|
+
if (firstTokens <= budget) {
|
|
337
|
+
result.push(messages[0]);
|
|
338
|
+
usedTokens += firstTokens;
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
const tail = [];
|
|
342
|
+
for (let i = messages.length - 1; i >= 1; i--) {
|
|
343
|
+
const msgTokens = yield* estimateTokenCount([messages[i]]);
|
|
344
|
+
if (usedTokens + msgTokens <= budget) {
|
|
345
|
+
tail.unshift(messages[i]);
|
|
346
|
+
usedTokens += msgTokens;
|
|
347
|
+
} else {
|
|
348
|
+
break;
|
|
349
|
+
}
|
|
350
|
+
}
|
|
351
|
+
return [...result, ...tail];
|
|
352
|
+
}
|
|
353
|
+
}
|
|
354
|
+
});
|
|
355
|
+
|
|
356
|
+
// src/providers/anthropic.ts
|
|
357
|
+
import { Effect as Effect4, Layer as Layer3, Stream, Schema as Schema2 } from "effect";
|
|
358
|
+
|
|
359
|
+
// src/retry.ts
|
|
360
|
+
import { Schedule } from "effect";
|
|
361
|
+
var retryPolicy = Schedule.intersect(
|
|
362
|
+
Schedule.recurs(3),
|
|
363
|
+
Schedule.exponential("1 second", 2)
|
|
364
|
+
).pipe(
|
|
365
|
+
Schedule.whileInput(
|
|
366
|
+
(error) => error._tag === "LLMRateLimitError" || error._tag === "LLMTimeoutError"
|
|
367
|
+
)
|
|
368
|
+
);
|
|
369
|
+
|
|
370
|
+
// src/providers/anthropic.ts
|
|
371
|
+
var toAnthropicMessages = (messages) => messages.filter((m) => m.role !== "system").map((m) => ({
|
|
372
|
+
role: m.role,
|
|
373
|
+
content: typeof m.content === "string" ? m.content : m.content.map(
|
|
374
|
+
(b) => b
|
|
375
|
+
)
|
|
376
|
+
}));
|
|
377
|
+
var toAnthropicTool = (tool) => ({
|
|
378
|
+
name: tool.name,
|
|
379
|
+
description: tool.description,
|
|
380
|
+
input_schema: {
|
|
381
|
+
type: "object",
|
|
382
|
+
...tool.inputSchema
|
|
383
|
+
}
|
|
384
|
+
});
|
|
385
|
+
var toEffectError = (error, provider) => {
|
|
386
|
+
const err = error;
|
|
387
|
+
if (err.status === 429) {
|
|
388
|
+
const retryAfter = err.headers?.["retry-after"];
|
|
389
|
+
return new LLMRateLimitError({
|
|
390
|
+
message: err.message ?? "Rate limit exceeded",
|
|
391
|
+
provider,
|
|
392
|
+
retryAfterMs: retryAfter ? Number(retryAfter) * 1e3 : 6e4
|
|
393
|
+
});
|
|
394
|
+
}
|
|
395
|
+
return new LLMError({
|
|
396
|
+
message: err.message ?? String(error),
|
|
397
|
+
provider,
|
|
398
|
+
cause: error
|
|
399
|
+
});
|
|
400
|
+
};
|
|
401
|
+
var AnthropicProviderLive = Layer3.effect(
|
|
402
|
+
LLMService,
|
|
403
|
+
Effect4.gen(function* () {
|
|
404
|
+
const config = yield* LLMConfig;
|
|
405
|
+
const createClient = () => {
|
|
406
|
+
const Anthropic = __require("@anthropic-ai/sdk").default;
|
|
407
|
+
return new Anthropic({ apiKey: config.anthropicApiKey });
|
|
408
|
+
};
|
|
409
|
+
let _client = null;
|
|
410
|
+
const getClient = () => {
|
|
411
|
+
if (!_client) _client = createClient();
|
|
412
|
+
return _client;
|
|
413
|
+
};
|
|
414
|
+
return LLMService.of({
|
|
415
|
+
complete: (request) => Effect4.gen(function* () {
|
|
416
|
+
const client = getClient();
|
|
417
|
+
const model = request.model?.model ?? config.defaultModel;
|
|
418
|
+
const response = yield* Effect4.tryPromise({
|
|
419
|
+
try: () => client.messages.create({
|
|
420
|
+
model,
|
|
421
|
+
max_tokens: request.maxTokens ?? config.defaultMaxTokens,
|
|
422
|
+
temperature: request.temperature ?? config.defaultTemperature,
|
|
423
|
+
system: request.systemPrompt,
|
|
424
|
+
messages: toAnthropicMessages(request.messages),
|
|
425
|
+
stop_sequences: request.stopSequences ? [...request.stopSequences] : void 0,
|
|
426
|
+
tools: request.tools?.map(toAnthropicTool)
|
|
427
|
+
}),
|
|
428
|
+
catch: (error) => toEffectError(error, "anthropic")
|
|
429
|
+
});
|
|
430
|
+
return mapAnthropicResponse(response, model);
|
|
431
|
+
}).pipe(
|
|
432
|
+
Effect4.retry(retryPolicy),
|
|
433
|
+
Effect4.timeout("30 seconds"),
|
|
434
|
+
Effect4.catchTag(
|
|
435
|
+
"TimeoutException",
|
|
436
|
+
() => Effect4.fail(
|
|
437
|
+
new LLMTimeoutError({
|
|
438
|
+
message: "LLM request timed out",
|
|
439
|
+
provider: "anthropic",
|
|
440
|
+
timeoutMs: 3e4
|
|
441
|
+
})
|
|
442
|
+
)
|
|
443
|
+
)
|
|
444
|
+
),
|
|
445
|
+
stream: (request) => Effect4.gen(function* () {
|
|
446
|
+
const client = getClient();
|
|
447
|
+
const model = request.model?.model ?? config.defaultModel;
|
|
448
|
+
return Stream.async((emit) => {
|
|
449
|
+
const stream = client.messages.stream({
|
|
450
|
+
model,
|
|
451
|
+
max_tokens: request.maxTokens ?? config.defaultMaxTokens,
|
|
452
|
+
temperature: request.temperature ?? config.defaultTemperature,
|
|
453
|
+
system: request.systemPrompt,
|
|
454
|
+
messages: toAnthropicMessages(request.messages)
|
|
455
|
+
});
|
|
456
|
+
stream.on("text", (text) => {
|
|
457
|
+
emit.single({ type: "text_delta", text });
|
|
458
|
+
});
|
|
459
|
+
stream.on("finalMessage", (message) => {
|
|
460
|
+
const msg = message;
|
|
461
|
+
const content = msg.content.filter(
|
|
462
|
+
(b) => b.type === "text"
|
|
463
|
+
).map((b) => b.text).join("");
|
|
464
|
+
emit.single({ type: "content_complete", content });
|
|
465
|
+
emit.single({
|
|
466
|
+
type: "usage",
|
|
467
|
+
usage: {
|
|
468
|
+
inputTokens: msg.usage.input_tokens,
|
|
469
|
+
outputTokens: msg.usage.output_tokens,
|
|
470
|
+
totalTokens: msg.usage.input_tokens + msg.usage.output_tokens,
|
|
471
|
+
estimatedCost: calculateCost(
|
|
472
|
+
msg.usage.input_tokens,
|
|
473
|
+
msg.usage.output_tokens,
|
|
474
|
+
model
|
|
475
|
+
)
|
|
476
|
+
}
|
|
477
|
+
});
|
|
478
|
+
emit.end();
|
|
479
|
+
});
|
|
480
|
+
stream.on("error", (error) => {
|
|
481
|
+
const err = error;
|
|
482
|
+
emit.fail(
|
|
483
|
+
new LLMError({
|
|
484
|
+
message: err.message ?? String(error),
|
|
485
|
+
provider: "anthropic",
|
|
486
|
+
cause: error
|
|
487
|
+
})
|
|
488
|
+
);
|
|
489
|
+
});
|
|
490
|
+
});
|
|
491
|
+
}),
|
|
492
|
+
completeStructured: (request) => Effect4.gen(function* () {
|
|
493
|
+
const schemaStr = JSON.stringify(
|
|
494
|
+
Schema2.encodedSchema(request.outputSchema),
|
|
495
|
+
null,
|
|
496
|
+
2
|
|
497
|
+
);
|
|
498
|
+
const messagesWithFormat = [
|
|
499
|
+
...request.messages,
|
|
500
|
+
{
|
|
501
|
+
role: "user",
|
|
502
|
+
content: `
|
|
503
|
+
Respond with ONLY valid JSON matching this schema:
|
|
504
|
+
${schemaStr}
|
|
505
|
+
|
|
506
|
+
No markdown, no code fences, just raw JSON.`
|
|
507
|
+
}
|
|
508
|
+
];
|
|
509
|
+
let lastError = null;
|
|
510
|
+
const maxRetries = request.maxParseRetries ?? 2;
|
|
511
|
+
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
|
512
|
+
const msgs = attempt === 0 ? messagesWithFormat : [
|
|
513
|
+
...messagesWithFormat,
|
|
514
|
+
{
|
|
515
|
+
role: "assistant",
|
|
516
|
+
content: String(lastError)
|
|
517
|
+
},
|
|
518
|
+
{
|
|
519
|
+
role: "user",
|
|
520
|
+
content: `That response was not valid JSON. The parse error was: ${String(lastError)}. Please try again with valid JSON only.`
|
|
521
|
+
}
|
|
522
|
+
];
|
|
523
|
+
const completeResult = yield* Effect4.tryPromise({
|
|
524
|
+
try: () => {
|
|
525
|
+
const client = getClient();
|
|
526
|
+
return client.messages.create({
|
|
527
|
+
model: request.model?.model ?? config.defaultModel,
|
|
528
|
+
max_tokens: request.maxTokens ?? config.defaultMaxTokens,
|
|
529
|
+
temperature: request.temperature ?? config.defaultTemperature,
|
|
530
|
+
system: request.systemPrompt,
|
|
531
|
+
messages: toAnthropicMessages(msgs)
|
|
532
|
+
});
|
|
533
|
+
},
|
|
534
|
+
catch: (error) => toEffectError(error, "anthropic")
|
|
535
|
+
});
|
|
536
|
+
const response = mapAnthropicResponse(
|
|
537
|
+
completeResult,
|
|
538
|
+
request.model?.model ?? config.defaultModel
|
|
539
|
+
);
|
|
540
|
+
try {
|
|
541
|
+
const parsed = JSON.parse(response.content);
|
|
542
|
+
const decoded = Schema2.decodeUnknownEither(
|
|
543
|
+
request.outputSchema
|
|
544
|
+
)(parsed);
|
|
545
|
+
if (decoded._tag === "Right") {
|
|
546
|
+
return decoded.right;
|
|
547
|
+
}
|
|
548
|
+
lastError = decoded.left;
|
|
549
|
+
} catch (e) {
|
|
550
|
+
lastError = e;
|
|
551
|
+
}
|
|
552
|
+
}
|
|
553
|
+
return yield* Effect4.fail(
|
|
554
|
+
new LLMParseError({
|
|
555
|
+
message: `Failed to parse structured output after ${maxRetries + 1} attempts`,
|
|
556
|
+
rawOutput: String(lastError),
|
|
557
|
+
expectedSchema: schemaStr
|
|
558
|
+
})
|
|
559
|
+
);
|
|
560
|
+
}),
|
|
561
|
+
embed: (texts, model) => Effect4.tryPromise({
|
|
562
|
+
try: async () => {
|
|
563
|
+
const embeddingModel = model ?? config.embeddingConfig.model;
|
|
564
|
+
const embProvider = config.embeddingConfig.provider;
|
|
565
|
+
if (embProvider === "openai") {
|
|
566
|
+
const { default: OpenAI } = await import("openai");
|
|
567
|
+
const openaiClient = new OpenAI({
|
|
568
|
+
apiKey: config.openaiApiKey
|
|
569
|
+
});
|
|
570
|
+
const batchSize = config.embeddingConfig.batchSize ?? 100;
|
|
571
|
+
const results = [];
|
|
572
|
+
for (let i = 0; i < texts.length; i += batchSize) {
|
|
573
|
+
const batch = texts.slice(i, i + batchSize);
|
|
574
|
+
const response = await openaiClient.embeddings.create({
|
|
575
|
+
model: embeddingModel,
|
|
576
|
+
input: [...batch],
|
|
577
|
+
dimensions: config.embeddingConfig.dimensions
|
|
578
|
+
});
|
|
579
|
+
results.push(
|
|
580
|
+
...response.data.map(
|
|
581
|
+
(d) => d.embedding
|
|
582
|
+
)
|
|
583
|
+
);
|
|
584
|
+
}
|
|
585
|
+
return results;
|
|
586
|
+
}
|
|
587
|
+
const endpoint = config.ollamaEndpoint ?? "http://localhost:11434";
|
|
588
|
+
return Promise.all(
|
|
589
|
+
[...texts].map(async (text) => {
|
|
590
|
+
const res = await fetch(`${endpoint}/api/embed`, {
|
|
591
|
+
method: "POST",
|
|
592
|
+
headers: { "Content-Type": "application/json" },
|
|
593
|
+
body: JSON.stringify({
|
|
594
|
+
model: embeddingModel,
|
|
595
|
+
input: text
|
|
596
|
+
})
|
|
597
|
+
});
|
|
598
|
+
const data = await res.json();
|
|
599
|
+
return data.embeddings[0];
|
|
600
|
+
})
|
|
601
|
+
);
|
|
602
|
+
},
|
|
603
|
+
catch: (error) => new LLMError({
|
|
604
|
+
message: `Embedding failed: ${error}`,
|
|
605
|
+
provider: "anthropic",
|
|
606
|
+
cause: error
|
|
607
|
+
})
|
|
608
|
+
}),
|
|
609
|
+
countTokens: (messages) => Effect4.gen(function* () {
|
|
610
|
+
return yield* estimateTokenCount(messages);
|
|
611
|
+
}),
|
|
612
|
+
getModelConfig: () => Effect4.succeed({
|
|
613
|
+
provider: "anthropic",
|
|
614
|
+
model: config.defaultModel
|
|
615
|
+
})
|
|
616
|
+
});
|
|
617
|
+
})
|
|
618
|
+
);
|
|
619
|
+
var mapAnthropicResponse = (response, model) => {
|
|
620
|
+
const textContent = response.content.filter(
|
|
621
|
+
(b) => b.type === "text"
|
|
622
|
+
).map((b) => b.text).join("");
|
|
623
|
+
const toolCalls = response.content.filter(
|
|
624
|
+
(b) => b.type === "tool_use"
|
|
625
|
+
).map((b) => ({
|
|
626
|
+
id: b.id,
|
|
627
|
+
name: b.name,
|
|
628
|
+
input: b.input
|
|
629
|
+
}));
|
|
630
|
+
const stopReason = response.stop_reason === "end_turn" ? "end_turn" : response.stop_reason === "max_tokens" ? "max_tokens" : response.stop_reason === "stop_sequence" ? "stop_sequence" : response.stop_reason === "tool_use" ? "tool_use" : "end_turn";
|
|
631
|
+
return {
|
|
632
|
+
content: textContent,
|
|
633
|
+
stopReason,
|
|
634
|
+
usage: {
|
|
635
|
+
inputTokens: response.usage.input_tokens,
|
|
636
|
+
outputTokens: response.usage.output_tokens,
|
|
637
|
+
totalTokens: response.usage.input_tokens + response.usage.output_tokens,
|
|
638
|
+
estimatedCost: calculateCost(
|
|
639
|
+
response.usage.input_tokens,
|
|
640
|
+
response.usage.output_tokens,
|
|
641
|
+
model
|
|
642
|
+
)
|
|
643
|
+
},
|
|
644
|
+
model: response.model ?? model,
|
|
645
|
+
toolCalls: toolCalls.length > 0 ? toolCalls : void 0
|
|
646
|
+
};
|
|
647
|
+
};
|
|
648
|
+
|
|
649
|
+
// src/providers/openai.ts
|
|
650
|
+
import { Effect as Effect5, Layer as Layer4, Stream as Stream2, Schema as Schema3 } from "effect";
|
|
651
|
+
var toOpenAIMessages = (messages) => messages.map((m) => ({
|
|
652
|
+
role: m.role,
|
|
653
|
+
content: typeof m.content === "string" ? m.content : m.content.filter(
|
|
654
|
+
(b) => b.type === "text"
|
|
655
|
+
).map((b) => b.text).join("")
|
|
656
|
+
}));
|
|
657
|
+
var toEffectError2 = (error, provider) => {
|
|
658
|
+
const err = error;
|
|
659
|
+
if (err.status === 429) {
|
|
660
|
+
return new LLMRateLimitError({
|
|
661
|
+
message: err.message ?? "Rate limit exceeded",
|
|
662
|
+
provider,
|
|
663
|
+
retryAfterMs: 6e4
|
|
664
|
+
});
|
|
665
|
+
}
|
|
666
|
+
return new LLMError({
|
|
667
|
+
message: err.message ?? String(error),
|
|
668
|
+
provider,
|
|
669
|
+
cause: error
|
|
670
|
+
});
|
|
671
|
+
};
|
|
672
|
+
var OpenAIProviderLive = Layer4.effect(
|
|
673
|
+
LLMService,
|
|
674
|
+
Effect5.gen(function* () {
|
|
675
|
+
const config = yield* LLMConfig;
|
|
676
|
+
const createClient = () => {
|
|
677
|
+
const OpenAI = __require("openai").default;
|
|
678
|
+
return new OpenAI({ apiKey: config.openaiApiKey });
|
|
679
|
+
};
|
|
680
|
+
let _client = null;
|
|
681
|
+
const getClient = () => {
|
|
682
|
+
if (!_client) _client = createClient();
|
|
683
|
+
return _client;
|
|
684
|
+
};
|
|
685
|
+
const defaultModel = config.defaultModel.startsWith("claude") ? "gpt-4o" : config.defaultModel;
|
|
686
|
+
return LLMService.of({
|
|
687
|
+
complete: (request) => Effect5.gen(function* () {
|
|
688
|
+
const client = getClient();
|
|
689
|
+
const model = request.model?.model ?? defaultModel;
|
|
690
|
+
const response = yield* Effect5.tryPromise({
|
|
691
|
+
try: () => client.chat.completions.create({
|
|
692
|
+
model,
|
|
693
|
+
max_tokens: request.maxTokens ?? config.defaultMaxTokens,
|
|
694
|
+
temperature: request.temperature ?? config.defaultTemperature,
|
|
695
|
+
messages: toOpenAIMessages(request.messages),
|
|
696
|
+
stop: request.stopSequences ? [...request.stopSequences] : void 0
|
|
697
|
+
}),
|
|
698
|
+
catch: (error) => toEffectError2(error, "openai")
|
|
699
|
+
});
|
|
700
|
+
return mapOpenAIResponse(response, model);
|
|
701
|
+
}).pipe(
|
|
702
|
+
Effect5.retry(retryPolicy),
|
|
703
|
+
Effect5.timeout("30 seconds"),
|
|
704
|
+
Effect5.catchTag(
|
|
705
|
+
"TimeoutException",
|
|
706
|
+
() => Effect5.fail(
|
|
707
|
+
new LLMTimeoutError({
|
|
708
|
+
message: "LLM request timed out",
|
|
709
|
+
provider: "openai",
|
|
710
|
+
timeoutMs: 3e4
|
|
711
|
+
})
|
|
712
|
+
)
|
|
713
|
+
)
|
|
714
|
+
),
|
|
715
|
+
stream: (request) => Effect5.gen(function* () {
|
|
716
|
+
const client = getClient();
|
|
717
|
+
const model = request.model?.model ?? defaultModel;
|
|
718
|
+
return Stream2.async((emit) => {
|
|
719
|
+
const doStream = async () => {
|
|
720
|
+
try {
|
|
721
|
+
const stream = await client.chat.completions.create({
|
|
722
|
+
model,
|
|
723
|
+
max_tokens: request.maxTokens ?? config.defaultMaxTokens,
|
|
724
|
+
temperature: request.temperature ?? config.defaultTemperature,
|
|
725
|
+
messages: toOpenAIMessages(request.messages),
|
|
726
|
+
stream: true
|
|
727
|
+
});
|
|
728
|
+
let fullContent = "";
|
|
729
|
+
for await (const chunk of stream) {
|
|
730
|
+
const delta = chunk.choices[0]?.delta?.content;
|
|
731
|
+
if (delta) {
|
|
732
|
+
fullContent += delta;
|
|
733
|
+
emit.single({ type: "text_delta", text: delta });
|
|
734
|
+
}
|
|
735
|
+
if (chunk.choices[0]?.finish_reason) {
|
|
736
|
+
emit.single({
|
|
737
|
+
type: "content_complete",
|
|
738
|
+
content: fullContent
|
|
739
|
+
});
|
|
740
|
+
const inputTokens = chunk.usage?.prompt_tokens ?? 0;
|
|
741
|
+
const outputTokens = chunk.usage?.completion_tokens ?? 0;
|
|
742
|
+
emit.single({
|
|
743
|
+
type: "usage",
|
|
744
|
+
usage: {
|
|
745
|
+
inputTokens,
|
|
746
|
+
outputTokens,
|
|
747
|
+
totalTokens: inputTokens + outputTokens,
|
|
748
|
+
estimatedCost: calculateCost(
|
|
749
|
+
inputTokens,
|
|
750
|
+
outputTokens,
|
|
751
|
+
model
|
|
752
|
+
)
|
|
753
|
+
}
|
|
754
|
+
});
|
|
755
|
+
emit.end();
|
|
756
|
+
}
|
|
757
|
+
}
|
|
758
|
+
} catch (error) {
|
|
759
|
+
const err = error;
|
|
760
|
+
emit.fail(
|
|
761
|
+
new LLMError({
|
|
762
|
+
message: err.message ?? String(error),
|
|
763
|
+
provider: "openai",
|
|
764
|
+
cause: error
|
|
765
|
+
})
|
|
766
|
+
);
|
|
767
|
+
}
|
|
768
|
+
};
|
|
769
|
+
void doStream();
|
|
770
|
+
});
|
|
771
|
+
}),
|
|
772
|
+
completeStructured: (request) => Effect5.gen(function* () {
|
|
773
|
+
const schemaStr = JSON.stringify(
|
|
774
|
+
Schema3.encodedSchema(request.outputSchema),
|
|
775
|
+
null,
|
|
776
|
+
2
|
|
777
|
+
);
|
|
778
|
+
const messagesWithFormat = [
|
|
779
|
+
...request.messages,
|
|
780
|
+
{
|
|
781
|
+
role: "user",
|
|
782
|
+
content: `
|
|
783
|
+
Respond with ONLY valid JSON matching this schema:
|
|
784
|
+
${schemaStr}
|
|
785
|
+
|
|
786
|
+
No markdown, no code fences, just raw JSON.`
|
|
787
|
+
}
|
|
788
|
+
];
|
|
789
|
+
let lastError = null;
|
|
790
|
+
const maxRetries = request.maxParseRetries ?? 2;
|
|
791
|
+
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
|
792
|
+
const msgs = attempt === 0 ? messagesWithFormat : [
|
|
793
|
+
...messagesWithFormat,
|
|
794
|
+
{
|
|
795
|
+
role: "assistant",
|
|
796
|
+
content: String(lastError)
|
|
797
|
+
},
|
|
798
|
+
{
|
|
799
|
+
role: "user",
|
|
800
|
+
content: `That response was not valid JSON. The parse error was: ${String(lastError)}. Please try again with valid JSON only.`
|
|
801
|
+
}
|
|
802
|
+
];
|
|
803
|
+
const client = getClient();
|
|
804
|
+
const completeResult = yield* Effect5.tryPromise({
|
|
805
|
+
try: () => client.chat.completions.create({
|
|
806
|
+
model: request.model?.model ?? defaultModel,
|
|
807
|
+
max_tokens: request.maxTokens ?? config.defaultMaxTokens,
|
|
808
|
+
temperature: request.temperature ?? config.defaultTemperature,
|
|
809
|
+
messages: toOpenAIMessages(msgs)
|
|
810
|
+
}),
|
|
811
|
+
catch: (error) => toEffectError2(error, "openai")
|
|
812
|
+
});
|
|
813
|
+
const response = mapOpenAIResponse(
|
|
814
|
+
completeResult,
|
|
815
|
+
request.model?.model ?? defaultModel
|
|
816
|
+
);
|
|
817
|
+
try {
|
|
818
|
+
const parsed = JSON.parse(response.content);
|
|
819
|
+
const decoded = Schema3.decodeUnknownEither(
|
|
820
|
+
request.outputSchema
|
|
821
|
+
)(parsed);
|
|
822
|
+
if (decoded._tag === "Right") {
|
|
823
|
+
return decoded.right;
|
|
824
|
+
}
|
|
825
|
+
lastError = decoded.left;
|
|
826
|
+
} catch (e) {
|
|
827
|
+
lastError = e;
|
|
828
|
+
}
|
|
829
|
+
}
|
|
830
|
+
return yield* Effect5.fail(
|
|
831
|
+
new LLMParseError({
|
|
832
|
+
message: `Failed to parse structured output after ${maxRetries + 1} attempts`,
|
|
833
|
+
rawOutput: String(lastError),
|
|
834
|
+
expectedSchema: schemaStr
|
|
835
|
+
})
|
|
836
|
+
);
|
|
837
|
+
}),
|
|
838
|
+
embed: (texts, model) => Effect5.tryPromise({
|
|
839
|
+
try: async () => {
|
|
840
|
+
const client = getClient();
|
|
841
|
+
const embeddingModel = model ?? config.embeddingConfig.model;
|
|
842
|
+
const batchSize = config.embeddingConfig.batchSize ?? 100;
|
|
843
|
+
const results = [];
|
|
844
|
+
for (let i = 0; i < texts.length; i += batchSize) {
|
|
845
|
+
const batch = texts.slice(i, i + batchSize);
|
|
846
|
+
const response = await client.embeddings.create({
|
|
847
|
+
model: embeddingModel,
|
|
848
|
+
input: [...batch],
|
|
849
|
+
dimensions: config.embeddingConfig.dimensions
|
|
850
|
+
});
|
|
851
|
+
results.push(
|
|
852
|
+
...response.data.map(
|
|
853
|
+
(d) => d.embedding
|
|
854
|
+
)
|
|
855
|
+
);
|
|
856
|
+
}
|
|
857
|
+
return results;
|
|
858
|
+
},
|
|
859
|
+
catch: (error) => new LLMError({
|
|
860
|
+
message: `Embedding failed: ${error}`,
|
|
861
|
+
provider: "openai",
|
|
862
|
+
cause: error
|
|
863
|
+
})
|
|
864
|
+
}),
|
|
865
|
+
countTokens: (messages) => Effect5.gen(function* () {
|
|
866
|
+
return yield* estimateTokenCount(messages);
|
|
867
|
+
}),
|
|
868
|
+
getModelConfig: () => Effect5.succeed({
|
|
869
|
+
provider: "openai",
|
|
870
|
+
model: defaultModel
|
|
871
|
+
})
|
|
872
|
+
});
|
|
873
|
+
})
|
|
874
|
+
);
|
|
875
|
+
var mapOpenAIResponse = (response, model) => {
|
|
876
|
+
const content = response.choices[0]?.message?.content ?? "";
|
|
877
|
+
const stopReason = response.choices[0]?.finish_reason === "stop" ? "end_turn" : response.choices[0]?.finish_reason === "length" ? "max_tokens" : "end_turn";
|
|
878
|
+
return {
|
|
879
|
+
content,
|
|
880
|
+
stopReason,
|
|
881
|
+
usage: {
|
|
882
|
+
inputTokens: response.usage?.prompt_tokens ?? 0,
|
|
883
|
+
outputTokens: response.usage?.completion_tokens ?? 0,
|
|
884
|
+
totalTokens: response.usage?.total_tokens ?? 0,
|
|
885
|
+
estimatedCost: calculateCost(
|
|
886
|
+
response.usage?.prompt_tokens ?? 0,
|
|
887
|
+
response.usage?.completion_tokens ?? 0,
|
|
888
|
+
model
|
|
889
|
+
)
|
|
890
|
+
},
|
|
891
|
+
model: response.model ?? model
|
|
892
|
+
};
|
|
893
|
+
};
|
|
894
|
+
|
|
895
|
+
// src/providers/local.ts
|
|
896
|
+
import { Effect as Effect6, Layer as Layer5, Stream as Stream3, Schema as Schema4 } from "effect";
|
|
897
|
+
var toOllamaMessages = (messages) => messages.map((m) => ({
|
|
898
|
+
role: m.role,
|
|
899
|
+
content: typeof m.content === "string" ? m.content : m.content.filter(
|
|
900
|
+
(b) => b.type === "text"
|
|
901
|
+
).map((b) => b.text).join("")
|
|
902
|
+
}));
|
|
903
|
+
var LocalProviderLive = Layer5.effect(
|
|
904
|
+
LLMService,
|
|
905
|
+
Effect6.gen(function* () {
|
|
906
|
+
const config = yield* LLMConfig;
|
|
907
|
+
const endpoint = config.ollamaEndpoint ?? "http://localhost:11434";
|
|
908
|
+
const defaultModel = config.defaultModel.startsWith("claude") || config.defaultModel.startsWith("gpt") ? "llama3" : config.defaultModel;
|
|
909
|
+
return LLMService.of({
|
|
910
|
+
complete: (request) => Effect6.gen(function* () {
|
|
911
|
+
const model = request.model?.model ?? defaultModel;
|
|
912
|
+
const response = yield* Effect6.tryPromise({
|
|
913
|
+
try: async () => {
|
|
914
|
+
const res = await fetch(`${endpoint}/api/chat`, {
|
|
915
|
+
method: "POST",
|
|
916
|
+
headers: { "Content-Type": "application/json" },
|
|
917
|
+
body: JSON.stringify({
|
|
918
|
+
model,
|
|
919
|
+
messages: toOllamaMessages(request.messages),
|
|
920
|
+
stream: false,
|
|
921
|
+
options: {
|
|
922
|
+
temperature: request.temperature ?? config.defaultTemperature,
|
|
923
|
+
num_predict: request.maxTokens ?? config.defaultMaxTokens,
|
|
924
|
+
stop: request.stopSequences ? [...request.stopSequences] : void 0
|
|
925
|
+
}
|
|
926
|
+
})
|
|
927
|
+
});
|
|
928
|
+
if (!res.ok) {
|
|
929
|
+
throw new Error(
|
|
930
|
+
`Ollama request failed: ${res.status} ${res.statusText}`
|
|
931
|
+
);
|
|
932
|
+
}
|
|
933
|
+
return await res.json();
|
|
934
|
+
},
|
|
935
|
+
catch: (error) => new LLMError({
|
|
936
|
+
message: `Ollama request failed: ${error}`,
|
|
937
|
+
provider: "ollama",
|
|
938
|
+
cause: error
|
|
939
|
+
})
|
|
940
|
+
});
|
|
941
|
+
const content = response.message?.content ?? "";
|
|
942
|
+
const inputTokens = response.prompt_eval_count ?? 0;
|
|
943
|
+
const outputTokens = response.eval_count ?? 0;
|
|
944
|
+
return {
|
|
945
|
+
content,
|
|
946
|
+
stopReason: response.done_reason === "stop" ? "end_turn" : response.done_reason === "length" ? "max_tokens" : "end_turn",
|
|
947
|
+
usage: {
|
|
948
|
+
inputTokens,
|
|
949
|
+
outputTokens,
|
|
950
|
+
totalTokens: inputTokens + outputTokens,
|
|
951
|
+
estimatedCost: 0
|
|
952
|
+
// Local models are free
|
|
953
|
+
},
|
|
954
|
+
model: response.model ?? model
|
|
955
|
+
};
|
|
956
|
+
}).pipe(
|
|
957
|
+
Effect6.retry(retryPolicy),
|
|
958
|
+
Effect6.timeout("60 seconds"),
|
|
959
|
+
Effect6.catchTag(
|
|
960
|
+
"TimeoutException",
|
|
961
|
+
() => Effect6.fail(
|
|
962
|
+
new LLMTimeoutError({
|
|
963
|
+
message: "Local LLM request timed out",
|
|
964
|
+
provider: "ollama",
|
|
965
|
+
timeoutMs: 6e4
|
|
966
|
+
})
|
|
967
|
+
)
|
|
968
|
+
)
|
|
969
|
+
),
|
|
970
|
+
stream: (request) => Effect6.gen(function* () {
|
|
971
|
+
const model = request.model?.model ?? defaultModel;
|
|
972
|
+
return Stream3.async((emit) => {
|
|
973
|
+
const doStream = async () => {
|
|
974
|
+
try {
|
|
975
|
+
const res = await fetch(`${endpoint}/api/chat`, {
|
|
976
|
+
method: "POST",
|
|
977
|
+
headers: { "Content-Type": "application/json" },
|
|
978
|
+
body: JSON.stringify({
|
|
979
|
+
model,
|
|
980
|
+
messages: toOllamaMessages(request.messages),
|
|
981
|
+
stream: true,
|
|
982
|
+
options: {
|
|
983
|
+
temperature: request.temperature ?? config.defaultTemperature,
|
|
984
|
+
num_predict: request.maxTokens ?? config.defaultMaxTokens
|
|
985
|
+
}
|
|
986
|
+
})
|
|
987
|
+
});
|
|
988
|
+
if (!res.ok || !res.body) {
|
|
989
|
+
throw new Error(`Ollama stream failed: ${res.status}`);
|
|
990
|
+
}
|
|
991
|
+
const reader = res.body.getReader();
|
|
992
|
+
const decoder = new TextDecoder();
|
|
993
|
+
let fullContent = "";
|
|
994
|
+
while (true) {
|
|
995
|
+
const { done, value } = await reader.read();
|
|
996
|
+
if (done) break;
|
|
997
|
+
const lines = decoder.decode(value, { stream: true }).split("\n").filter(Boolean);
|
|
998
|
+
for (const line of lines) {
|
|
999
|
+
const parsed = JSON.parse(line);
|
|
1000
|
+
if (parsed.message?.content) {
|
|
1001
|
+
fullContent += parsed.message.content;
|
|
1002
|
+
emit.single({
|
|
1003
|
+
type: "text_delta",
|
|
1004
|
+
text: parsed.message.content
|
|
1005
|
+
});
|
|
1006
|
+
}
|
|
1007
|
+
if (parsed.done) {
|
|
1008
|
+
emit.single({
|
|
1009
|
+
type: "content_complete",
|
|
1010
|
+
content: fullContent
|
|
1011
|
+
});
|
|
1012
|
+
emit.single({
|
|
1013
|
+
type: "usage",
|
|
1014
|
+
usage: {
|
|
1015
|
+
inputTokens: parsed.prompt_eval_count ?? 0,
|
|
1016
|
+
outputTokens: parsed.eval_count ?? 0,
|
|
1017
|
+
totalTokens: (parsed.prompt_eval_count ?? 0) + (parsed.eval_count ?? 0),
|
|
1018
|
+
estimatedCost: 0
|
|
1019
|
+
}
|
|
1020
|
+
});
|
|
1021
|
+
emit.end();
|
|
1022
|
+
}
|
|
1023
|
+
}
|
|
1024
|
+
}
|
|
1025
|
+
} catch (error) {
|
|
1026
|
+
const err = error;
|
|
1027
|
+
emit.fail(
|
|
1028
|
+
new LLMError({
|
|
1029
|
+
message: err.message ?? String(error),
|
|
1030
|
+
provider: "ollama",
|
|
1031
|
+
cause: error
|
|
1032
|
+
})
|
|
1033
|
+
);
|
|
1034
|
+
}
|
|
1035
|
+
};
|
|
1036
|
+
void doStream();
|
|
1037
|
+
});
|
|
1038
|
+
}),
|
|
1039
|
+
completeStructured: (request) => Effect6.gen(function* () {
|
|
1040
|
+
const schemaStr = JSON.stringify(
|
|
1041
|
+
Schema4.encodedSchema(request.outputSchema),
|
|
1042
|
+
null,
|
|
1043
|
+
2
|
|
1044
|
+
);
|
|
1045
|
+
const messagesWithFormat = [
|
|
1046
|
+
...request.messages,
|
|
1047
|
+
{
|
|
1048
|
+
role: "user",
|
|
1049
|
+
content: `
|
|
1050
|
+
Respond with ONLY valid JSON matching this schema:
|
|
1051
|
+
${schemaStr}
|
|
1052
|
+
|
|
1053
|
+
No markdown, no code fences, just raw JSON.`
|
|
1054
|
+
}
|
|
1055
|
+
];
|
|
1056
|
+
let lastError = null;
|
|
1057
|
+
const maxRetries = request.maxParseRetries ?? 2;
|
|
1058
|
+
const llm = {
|
|
1059
|
+
complete: (req) => Effect6.gen(function* () {
|
|
1060
|
+
const model = req.model?.model ?? defaultModel;
|
|
1061
|
+
const res = yield* Effect6.tryPromise({
|
|
1062
|
+
try: async () => {
|
|
1063
|
+
const resp = await fetch(`${endpoint}/api/chat`, {
|
|
1064
|
+
method: "POST",
|
|
1065
|
+
headers: { "Content-Type": "application/json" },
|
|
1066
|
+
body: JSON.stringify({
|
|
1067
|
+
model,
|
|
1068
|
+
messages: toOllamaMessages(req.messages),
|
|
1069
|
+
stream: false,
|
|
1070
|
+
options: {
|
|
1071
|
+
temperature: req.temperature ?? config.defaultTemperature,
|
|
1072
|
+
num_predict: req.maxTokens ?? config.defaultMaxTokens
|
|
1073
|
+
}
|
|
1074
|
+
})
|
|
1075
|
+
});
|
|
1076
|
+
return await resp.json();
|
|
1077
|
+
},
|
|
1078
|
+
catch: (error) => new LLMError({
|
|
1079
|
+
message: `Ollama request failed: ${error}`,
|
|
1080
|
+
provider: "ollama",
|
|
1081
|
+
cause: error
|
|
1082
|
+
})
|
|
1083
|
+
});
|
|
1084
|
+
const content = res.message?.content ?? "";
|
|
1085
|
+
const inputTokens = res.prompt_eval_count ?? 0;
|
|
1086
|
+
const outputTokens = res.eval_count ?? 0;
|
|
1087
|
+
return {
|
|
1088
|
+
content,
|
|
1089
|
+
stopReason: "end_turn",
|
|
1090
|
+
usage: {
|
|
1091
|
+
inputTokens,
|
|
1092
|
+
outputTokens,
|
|
1093
|
+
totalTokens: inputTokens + outputTokens,
|
|
1094
|
+
estimatedCost: 0
|
|
1095
|
+
},
|
|
1096
|
+
model: res.model ?? model
|
|
1097
|
+
};
|
|
1098
|
+
})
|
|
1099
|
+
};
|
|
1100
|
+
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
|
1101
|
+
const msgs = attempt === 0 ? messagesWithFormat : [
|
|
1102
|
+
...messagesWithFormat,
|
|
1103
|
+
{
|
|
1104
|
+
role: "assistant",
|
|
1105
|
+
content: String(lastError)
|
|
1106
|
+
},
|
|
1107
|
+
{
|
|
1108
|
+
role: "user",
|
|
1109
|
+
content: `That response was not valid JSON. The parse error was: ${String(lastError)}. Please try again with valid JSON only.`
|
|
1110
|
+
}
|
|
1111
|
+
];
|
|
1112
|
+
const response = yield* llm.complete({
|
|
1113
|
+
...request,
|
|
1114
|
+
messages: msgs
|
|
1115
|
+
});
|
|
1116
|
+
try {
|
|
1117
|
+
const parsed = JSON.parse(response.content);
|
|
1118
|
+
const decoded = Schema4.decodeUnknownEither(
|
|
1119
|
+
request.outputSchema
|
|
1120
|
+
)(parsed);
|
|
1121
|
+
if (decoded._tag === "Right") {
|
|
1122
|
+
return decoded.right;
|
|
1123
|
+
}
|
|
1124
|
+
lastError = decoded.left;
|
|
1125
|
+
} catch (e) {
|
|
1126
|
+
lastError = e;
|
|
1127
|
+
}
|
|
1128
|
+
}
|
|
1129
|
+
return yield* Effect6.fail(
|
|
1130
|
+
new LLMParseError({
|
|
1131
|
+
message: `Failed to parse structured output after ${maxRetries + 1} attempts`,
|
|
1132
|
+
rawOutput: String(lastError),
|
|
1133
|
+
expectedSchema: schemaStr
|
|
1134
|
+
})
|
|
1135
|
+
);
|
|
1136
|
+
}),
|
|
1137
|
+
embed: (texts, model) => Effect6.tryPromise({
|
|
1138
|
+
try: async () => {
|
|
1139
|
+
const embeddingModel = model ?? config.embeddingConfig.model ?? "nomic-embed-text";
|
|
1140
|
+
return Promise.all(
|
|
1141
|
+
[...texts].map(async (text) => {
|
|
1142
|
+
const res = await fetch(`${endpoint}/api/embed`, {
|
|
1143
|
+
method: "POST",
|
|
1144
|
+
headers: { "Content-Type": "application/json" },
|
|
1145
|
+
body: JSON.stringify({
|
|
1146
|
+
model: embeddingModel,
|
|
1147
|
+
input: text
|
|
1148
|
+
})
|
|
1149
|
+
});
|
|
1150
|
+
const data = await res.json();
|
|
1151
|
+
return data.embeddings[0];
|
|
1152
|
+
})
|
|
1153
|
+
);
|
|
1154
|
+
},
|
|
1155
|
+
catch: (error) => new LLMError({
|
|
1156
|
+
message: `Embedding failed: ${error}`,
|
|
1157
|
+
provider: "ollama",
|
|
1158
|
+
cause: error
|
|
1159
|
+
})
|
|
1160
|
+
}),
|
|
1161
|
+
countTokens: (messages) => Effect6.gen(function* () {
|
|
1162
|
+
return yield* estimateTokenCount(messages);
|
|
1163
|
+
}),
|
|
1164
|
+
getModelConfig: () => Effect6.succeed({
|
|
1165
|
+
provider: "ollama",
|
|
1166
|
+
model: defaultModel
|
|
1167
|
+
})
|
|
1168
|
+
});
|
|
1169
|
+
})
|
|
1170
|
+
);
|
|
1171
|
+
|
|
1172
|
+
// src/providers/gemini.ts
|
|
1173
|
+
import { Effect as Effect7, Layer as Layer6, Stream as Stream4, Schema as Schema5 } from "effect";
|
|
1174
|
+
var toGeminiContents = (messages) => {
|
|
1175
|
+
const result = [];
|
|
1176
|
+
for (const msg of messages) {
|
|
1177
|
+
if (msg.role === "system") continue;
|
|
1178
|
+
const role = msg.role === "assistant" ? "model" : "user";
|
|
1179
|
+
if (typeof msg.content === "string") {
|
|
1180
|
+
result.push({ role, parts: [{ text: msg.content }] });
|
|
1181
|
+
} else {
|
|
1182
|
+
const parts = [];
|
|
1183
|
+
for (const block of msg.content) {
|
|
1184
|
+
if (block.type === "text") {
|
|
1185
|
+
parts.push({ text: block.text });
|
|
1186
|
+
} else if (block.type === "tool_use") {
|
|
1187
|
+
parts.push({
|
|
1188
|
+
functionCall: { name: block.name, args: block.input }
|
|
1189
|
+
});
|
|
1190
|
+
} else if (block.type === "tool_result") {
|
|
1191
|
+
parts.push({
|
|
1192
|
+
functionResponse: {
|
|
1193
|
+
name: "tool",
|
|
1194
|
+
response: { content: block.content }
|
|
1195
|
+
}
|
|
1196
|
+
});
|
|
1197
|
+
}
|
|
1198
|
+
}
|
|
1199
|
+
if (parts.length > 0) {
|
|
1200
|
+
result.push({ role, parts });
|
|
1201
|
+
}
|
|
1202
|
+
}
|
|
1203
|
+
}
|
|
1204
|
+
return result;
|
|
1205
|
+
};
|
|
1206
|
+
var extractSystemPrompt = (messages) => {
|
|
1207
|
+
const sys = messages.find((m) => m.role === "system");
|
|
1208
|
+
if (!sys) return void 0;
|
|
1209
|
+
return typeof sys.content === "string" ? sys.content : void 0;
|
|
1210
|
+
};
|
|
1211
|
+
var toGeminiTools = (tools) => tools.length === 0 ? void 0 : [
|
|
1212
|
+
{
|
|
1213
|
+
functionDeclarations: tools.map((t) => ({
|
|
1214
|
+
name: t.name,
|
|
1215
|
+
description: t.description,
|
|
1216
|
+
parameters: { type: "object", ...t.inputSchema }
|
|
1217
|
+
}))
|
|
1218
|
+
}
|
|
1219
|
+
];
|
|
1220
|
+
var toEffectError3 = (error) => {
|
|
1221
|
+
const err = error;
|
|
1222
|
+
if (err.status === 429 || err.code === 429) {
|
|
1223
|
+
return new LLMRateLimitError({
|
|
1224
|
+
message: err.message ?? "Rate limit exceeded",
|
|
1225
|
+
provider: "gemini",
|
|
1226
|
+
retryAfterMs: 6e4
|
|
1227
|
+
});
|
|
1228
|
+
}
|
|
1229
|
+
return new LLMError({
|
|
1230
|
+
message: err.message ?? String(error),
|
|
1231
|
+
provider: "gemini",
|
|
1232
|
+
cause: error
|
|
1233
|
+
});
|
|
1234
|
+
};
|
|
1235
|
+
var mapGeminiResponse = (response, model) => {
|
|
1236
|
+
const toolCalls = response.functionCalls?.map((fc, i) => ({
|
|
1237
|
+
id: `call_${i}`,
|
|
1238
|
+
name: fc.name,
|
|
1239
|
+
input: fc.args
|
|
1240
|
+
}));
|
|
1241
|
+
const inputTokens = response.usageMetadata?.promptTokenCount ?? 0;
|
|
1242
|
+
const outputTokens = response.usageMetadata?.candidatesTokenCount ?? 0;
|
|
1243
|
+
return {
|
|
1244
|
+
content: response.text ?? "",
|
|
1245
|
+
stopReason: toolCalls?.length ? "tool_use" : "end_turn",
|
|
1246
|
+
usage: {
|
|
1247
|
+
inputTokens,
|
|
1248
|
+
outputTokens,
|
|
1249
|
+
totalTokens: inputTokens + outputTokens,
|
|
1250
|
+
estimatedCost: calculateCost(inputTokens, outputTokens, model)
|
|
1251
|
+
},
|
|
1252
|
+
model,
|
|
1253
|
+
toolCalls: toolCalls?.length ? toolCalls : void 0
|
|
1254
|
+
};
|
|
1255
|
+
};
|
|
1256
|
+
var GeminiProviderLive = Layer6.effect(
|
|
1257
|
+
LLMService,
|
|
1258
|
+
Effect7.gen(function* () {
|
|
1259
|
+
const config = yield* LLMConfig;
|
|
1260
|
+
let _clientPromise = null;
|
|
1261
|
+
const getClient = () => {
|
|
1262
|
+
if (!_clientPromise) {
|
|
1263
|
+
_clientPromise = import("@google/genai").then(({ GoogleGenAI }) => new GoogleGenAI({ apiKey: config.googleApiKey }));
|
|
1264
|
+
}
|
|
1265
|
+
return _clientPromise;
|
|
1266
|
+
};
|
|
1267
|
+
const buildGeminiConfig = (opts) => {
|
|
1268
|
+
const cfg = {
|
|
1269
|
+
maxOutputTokens: opts.maxTokens ?? config.defaultMaxTokens,
|
|
1270
|
+
temperature: opts.temperature ?? config.defaultTemperature
|
|
1271
|
+
};
|
|
1272
|
+
const sys = opts.systemPrompt;
|
|
1273
|
+
if (sys) cfg.systemInstruction = sys;
|
|
1274
|
+
if (opts.stopSequences?.length) cfg.stopSequences = [...opts.stopSequences];
|
|
1275
|
+
if (opts.tools?.length) {
|
|
1276
|
+
cfg.tools = toGeminiTools([...opts.tools]);
|
|
1277
|
+
}
|
|
1278
|
+
return cfg;
|
|
1279
|
+
};
|
|
1280
|
+
return LLMService.of({
|
|
1281
|
+
complete: (request) => Effect7.gen(function* () {
|
|
1282
|
+
const client = yield* Effect7.promise(() => getClient());
|
|
1283
|
+
const model = request.model?.model ?? config.defaultModel;
|
|
1284
|
+
const contents = toGeminiContents(request.messages);
|
|
1285
|
+
const systemPrompt = extractSystemPrompt(request.messages) ?? request.systemPrompt;
|
|
1286
|
+
const response = yield* Effect7.tryPromise({
|
|
1287
|
+
try: () => client.models.generateContent({
|
|
1288
|
+
model,
|
|
1289
|
+
contents,
|
|
1290
|
+
config: buildGeminiConfig({
|
|
1291
|
+
maxTokens: request.maxTokens,
|
|
1292
|
+
temperature: request.temperature,
|
|
1293
|
+
systemPrompt,
|
|
1294
|
+
stopSequences: request.stopSequences,
|
|
1295
|
+
tools: request.tools
|
|
1296
|
+
})
|
|
1297
|
+
}),
|
|
1298
|
+
catch: toEffectError3
|
|
1299
|
+
});
|
|
1300
|
+
return mapGeminiResponse(response, model);
|
|
1301
|
+
}).pipe(
|
|
1302
|
+
Effect7.retry(retryPolicy),
|
|
1303
|
+
Effect7.timeout("30 seconds"),
|
|
1304
|
+
Effect7.catchTag(
|
|
1305
|
+
"TimeoutException",
|
|
1306
|
+
() => Effect7.fail(
|
|
1307
|
+
new LLMTimeoutError({
|
|
1308
|
+
message: "LLM request timed out",
|
|
1309
|
+
provider: "gemini",
|
|
1310
|
+
timeoutMs: 3e4
|
|
1311
|
+
})
|
|
1312
|
+
)
|
|
1313
|
+
)
|
|
1314
|
+
),
|
|
1315
|
+
stream: (request) => Effect7.gen(function* () {
|
|
1316
|
+
const model = request.model?.model ?? config.defaultModel;
|
|
1317
|
+
const contents = toGeminiContents(request.messages);
|
|
1318
|
+
const systemPrompt = extractSystemPrompt(request.messages) ?? request.systemPrompt;
|
|
1319
|
+
return Stream4.async((emit) => {
|
|
1320
|
+
void (async () => {
|
|
1321
|
+
try {
|
|
1322
|
+
const client = await getClient();
|
|
1323
|
+
const stream = await client.models.generateContentStream({
|
|
1324
|
+
model,
|
|
1325
|
+
contents,
|
|
1326
|
+
config: buildGeminiConfig({
|
|
1327
|
+
maxTokens: request.maxTokens,
|
|
1328
|
+
temperature: request.temperature,
|
|
1329
|
+
systemPrompt
|
|
1330
|
+
})
|
|
1331
|
+
});
|
|
1332
|
+
let fullContent = "";
|
|
1333
|
+
let inputTokens = 0;
|
|
1334
|
+
let outputTokens = 0;
|
|
1335
|
+
for await (const chunk of stream) {
|
|
1336
|
+
if (chunk.text) {
|
|
1337
|
+
emit.single({ type: "text_delta", text: chunk.text });
|
|
1338
|
+
fullContent += chunk.text;
|
|
1339
|
+
}
|
|
1340
|
+
if (chunk.usageMetadata) {
|
|
1341
|
+
inputTokens = chunk.usageMetadata.promptTokenCount ?? 0;
|
|
1342
|
+
outputTokens = chunk.usageMetadata.candidatesTokenCount ?? 0;
|
|
1343
|
+
}
|
|
1344
|
+
}
|
|
1345
|
+
emit.single({ type: "content_complete", content: fullContent });
|
|
1346
|
+
emit.single({
|
|
1347
|
+
type: "usage",
|
|
1348
|
+
usage: {
|
|
1349
|
+
inputTokens,
|
|
1350
|
+
outputTokens,
|
|
1351
|
+
totalTokens: inputTokens + outputTokens,
|
|
1352
|
+
estimatedCost: calculateCost(inputTokens, outputTokens, model)
|
|
1353
|
+
}
|
|
1354
|
+
});
|
|
1355
|
+
emit.end();
|
|
1356
|
+
} catch (error) {
|
|
1357
|
+
const err = error;
|
|
1358
|
+
emit.fail(
|
|
1359
|
+
new LLMError({
|
|
1360
|
+
message: err.message ?? String(error),
|
|
1361
|
+
provider: "gemini",
|
|
1362
|
+
cause: error
|
|
1363
|
+
})
|
|
1364
|
+
);
|
|
1365
|
+
}
|
|
1366
|
+
})();
|
|
1367
|
+
});
|
|
1368
|
+
}),
|
|
1369
|
+
completeStructured: (request) => Effect7.gen(function* () {
|
|
1370
|
+
const schemaStr = JSON.stringify(
|
|
1371
|
+
Schema5.encodedSchema(request.outputSchema),
|
|
1372
|
+
null,
|
|
1373
|
+
2
|
|
1374
|
+
);
|
|
1375
|
+
const messagesWithFormat = [
|
|
1376
|
+
...request.messages,
|
|
1377
|
+
{
|
|
1378
|
+
role: "user",
|
|
1379
|
+
content: `
|
|
1380
|
+
Respond with ONLY valid JSON matching this schema:
|
|
1381
|
+
${schemaStr}
|
|
1382
|
+
|
|
1383
|
+
No markdown, no code fences, just raw JSON.`
|
|
1384
|
+
}
|
|
1385
|
+
];
|
|
1386
|
+
let lastError = null;
|
|
1387
|
+
const maxRetries = request.maxParseRetries ?? 2;
|
|
1388
|
+
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
|
1389
|
+
const msgs = attempt === 0 ? messagesWithFormat : [
|
|
1390
|
+
...messagesWithFormat,
|
|
1391
|
+
{
|
|
1392
|
+
role: "assistant",
|
|
1393
|
+
content: String(lastError)
|
|
1394
|
+
},
|
|
1395
|
+
{
|
|
1396
|
+
role: "user",
|
|
1397
|
+
content: `That response was not valid JSON. The parse error was: ${String(lastError)}. Please try again with valid JSON only.`
|
|
1398
|
+
}
|
|
1399
|
+
];
|
|
1400
|
+
const client = yield* Effect7.promise(() => getClient());
|
|
1401
|
+
const model = request.model?.model ?? config.defaultModel;
|
|
1402
|
+
const response = yield* Effect7.tryPromise({
|
|
1403
|
+
try: () => client.models.generateContent({
|
|
1404
|
+
model,
|
|
1405
|
+
contents: toGeminiContents(msgs),
|
|
1406
|
+
config: buildGeminiConfig({
|
|
1407
|
+
maxTokens: request.maxTokens,
|
|
1408
|
+
temperature: request.temperature,
|
|
1409
|
+
systemPrompt: request.systemPrompt
|
|
1410
|
+
})
|
|
1411
|
+
}),
|
|
1412
|
+
catch: toEffectError3
|
|
1413
|
+
});
|
|
1414
|
+
const mapped = mapGeminiResponse(response, model);
|
|
1415
|
+
try {
|
|
1416
|
+
const parsed = JSON.parse(mapped.content);
|
|
1417
|
+
const decoded = Schema5.decodeUnknownEither(
|
|
1418
|
+
request.outputSchema
|
|
1419
|
+
)(parsed);
|
|
1420
|
+
if (decoded._tag === "Right") {
|
|
1421
|
+
return decoded.right;
|
|
1422
|
+
}
|
|
1423
|
+
lastError = decoded.left;
|
|
1424
|
+
} catch (e) {
|
|
1425
|
+
lastError = e;
|
|
1426
|
+
}
|
|
1427
|
+
}
|
|
1428
|
+
return yield* Effect7.fail(
|
|
1429
|
+
new LLMParseError({
|
|
1430
|
+
message: `Failed to parse structured output after ${maxRetries + 1} attempts`,
|
|
1431
|
+
rawOutput: String(lastError),
|
|
1432
|
+
expectedSchema: schemaStr
|
|
1433
|
+
})
|
|
1434
|
+
);
|
|
1435
|
+
}),
|
|
1436
|
+
embed: (texts, model) => Effect7.tryPromise({
|
|
1437
|
+
try: async () => {
|
|
1438
|
+
const client = await getClient();
|
|
1439
|
+
const embeddingModel = model ?? "gemini-embedding-001";
|
|
1440
|
+
const result = await client.models.embedContent({
|
|
1441
|
+
model: embeddingModel,
|
|
1442
|
+
contents: [...texts],
|
|
1443
|
+
config: {
|
|
1444
|
+
outputDimensionality: config.embeddingConfig.dimensions
|
|
1445
|
+
}
|
|
1446
|
+
});
|
|
1447
|
+
return result.embeddings.map((e) => e.values);
|
|
1448
|
+
},
|
|
1449
|
+
catch: (error) => new LLMError({
|
|
1450
|
+
message: `Embedding failed: ${error}`,
|
|
1451
|
+
provider: "gemini",
|
|
1452
|
+
cause: error
|
|
1453
|
+
})
|
|
1454
|
+
}),
|
|
1455
|
+
countTokens: (messages) => Effect7.gen(function* () {
|
|
1456
|
+
return yield* estimateTokenCount(messages);
|
|
1457
|
+
}),
|
|
1458
|
+
getModelConfig: () => Effect7.succeed({
|
|
1459
|
+
provider: "gemini",
|
|
1460
|
+
model: config.defaultModel
|
|
1461
|
+
})
|
|
1462
|
+
});
|
|
1463
|
+
})
|
|
1464
|
+
);
|
|
1465
|
+
|
|
1466
|
+
// src/testing.ts
|
|
1467
|
+
import { Effect as Effect8, Layer as Layer7, Stream as Stream5, Schema as Schema6 } from "effect";
|
|
1468
|
+
var TestLLMService = (responses) => ({
|
|
1469
|
+
complete: (request) => Effect8.gen(function* () {
|
|
1470
|
+
const lastMessage = request.messages[request.messages.length - 1];
|
|
1471
|
+
const content = lastMessage && typeof lastMessage.content === "string" ? lastMessage.content : "";
|
|
1472
|
+
const systemPrompt = typeof request.systemPrompt === "string" ? request.systemPrompt : "";
|
|
1473
|
+
const searchText = `${content} ${systemPrompt}`;
|
|
1474
|
+
for (const [pattern, response] of Object.entries(responses)) {
|
|
1475
|
+
if (pattern.length > 0 && searchText.includes(pattern)) {
|
|
1476
|
+
return {
|
|
1477
|
+
content: response,
|
|
1478
|
+
stopReason: "end_turn",
|
|
1479
|
+
usage: {
|
|
1480
|
+
inputTokens: Math.ceil(content.length / 4),
|
|
1481
|
+
outputTokens: Math.ceil(response.length / 4),
|
|
1482
|
+
totalTokens: Math.ceil(content.length / 4) + Math.ceil(response.length / 4),
|
|
1483
|
+
estimatedCost: 0
|
|
1484
|
+
},
|
|
1485
|
+
model: "test-model"
|
|
1486
|
+
};
|
|
1487
|
+
}
|
|
1488
|
+
}
|
|
1489
|
+
return {
|
|
1490
|
+
content: "Test response",
|
|
1491
|
+
stopReason: "end_turn",
|
|
1492
|
+
usage: {
|
|
1493
|
+
inputTokens: 0,
|
|
1494
|
+
outputTokens: 0,
|
|
1495
|
+
totalTokens: 0,
|
|
1496
|
+
estimatedCost: 0
|
|
1497
|
+
},
|
|
1498
|
+
model: "test-model"
|
|
1499
|
+
};
|
|
1500
|
+
}),
|
|
1501
|
+
stream: (_request) => Effect8.succeed(
|
|
1502
|
+
Stream5.make(
|
|
1503
|
+
{ type: "text_delta", text: "Test " },
|
|
1504
|
+
{ type: "text_delta", text: "response" },
|
|
1505
|
+
{
|
|
1506
|
+
type: "content_complete",
|
|
1507
|
+
content: "Test response"
|
|
1508
|
+
},
|
|
1509
|
+
{
|
|
1510
|
+
type: "usage",
|
|
1511
|
+
usage: {
|
|
1512
|
+
inputTokens: 0,
|
|
1513
|
+
outputTokens: 0,
|
|
1514
|
+
totalTokens: 0,
|
|
1515
|
+
estimatedCost: 0
|
|
1516
|
+
}
|
|
1517
|
+
}
|
|
1518
|
+
)
|
|
1519
|
+
),
|
|
1520
|
+
completeStructured: (request) => Effect8.gen(function* () {
|
|
1521
|
+
const lastMessage = request.messages[request.messages.length - 1];
|
|
1522
|
+
const content = lastMessage && typeof lastMessage.content === "string" ? lastMessage.content : "";
|
|
1523
|
+
let responseContent = "Test response";
|
|
1524
|
+
for (const [pattern, response] of Object.entries(responses)) {
|
|
1525
|
+
if (content.includes(pattern)) {
|
|
1526
|
+
responseContent = response;
|
|
1527
|
+
break;
|
|
1528
|
+
}
|
|
1529
|
+
}
|
|
1530
|
+
const parsed = JSON.parse(responseContent);
|
|
1531
|
+
return Schema6.decodeUnknownSync(request.outputSchema)(parsed);
|
|
1532
|
+
}),
|
|
1533
|
+
embed: (texts) => Effect8.succeed(
|
|
1534
|
+
texts.map(() => new Array(768).fill(0).map(() => Math.random()))
|
|
1535
|
+
),
|
|
1536
|
+
countTokens: (messages) => Effect8.succeed(
|
|
1537
|
+
messages.reduce(
|
|
1538
|
+
(sum, m) => sum + (typeof m.content === "string" ? Math.ceil(m.content.length / 4) : 100),
|
|
1539
|
+
0
|
|
1540
|
+
)
|
|
1541
|
+
),
|
|
1542
|
+
getModelConfig: () => Effect8.succeed({
|
|
1543
|
+
provider: "anthropic",
|
|
1544
|
+
model: "test-model"
|
|
1545
|
+
})
|
|
1546
|
+
});
|
|
1547
|
+
var TestLLMServiceLayer = (responses = {}) => Layer7.succeed(LLMService, LLMService.of(TestLLMService(responses)));
|
|
1548
|
+
|
|
1549
|
+
// src/structured-output.ts
|
|
1550
|
+
import { Schema as Schema7 } from "effect";
|
|
1551
|
+
var ReActActionSchema = Schema7.Struct({
|
|
1552
|
+
thought: Schema7.String,
|
|
1553
|
+
action: Schema7.optional(
|
|
1554
|
+
Schema7.Struct({
|
|
1555
|
+
tool: Schema7.String,
|
|
1556
|
+
input: Schema7.Unknown
|
|
1557
|
+
})
|
|
1558
|
+
),
|
|
1559
|
+
finalAnswer: Schema7.optional(Schema7.String),
|
|
1560
|
+
isComplete: Schema7.Boolean
|
|
1561
|
+
});
|
|
1562
|
+
var PlanSchema = Schema7.Struct({
|
|
1563
|
+
goal: Schema7.String,
|
|
1564
|
+
steps: Schema7.Array(
|
|
1565
|
+
Schema7.Struct({
|
|
1566
|
+
id: Schema7.Number,
|
|
1567
|
+
description: Schema7.String,
|
|
1568
|
+
tool: Schema7.optional(Schema7.String),
|
|
1569
|
+
dependsOn: Schema7.optional(Schema7.Array(Schema7.Number)),
|
|
1570
|
+
estimatedDuration: Schema7.optional(Schema7.String)
|
|
1571
|
+
})
|
|
1572
|
+
)
|
|
1573
|
+
});
|
|
1574
|
+
var ReflectionSchema = Schema7.Struct({
|
|
1575
|
+
taskAccomplished: Schema7.Boolean,
|
|
1576
|
+
confidence: Schema7.Number,
|
|
1577
|
+
strengths: Schema7.Array(Schema7.String),
|
|
1578
|
+
weaknesses: Schema7.Array(Schema7.String),
|
|
1579
|
+
needsRefinement: Schema7.Boolean,
|
|
1580
|
+
refinementSuggestions: Schema7.optional(Schema7.Array(Schema7.String))
|
|
1581
|
+
});
|
|
1582
|
+
var StrategySelectionSchema = Schema7.Struct({
|
|
1583
|
+
selectedStrategy: Schema7.String,
|
|
1584
|
+
reasoning: Schema7.String,
|
|
1585
|
+
confidence: Schema7.Number,
|
|
1586
|
+
alternativeStrategies: Schema7.Array(
|
|
1587
|
+
Schema7.Struct({
|
|
1588
|
+
strategy: Schema7.String,
|
|
1589
|
+
whyNot: Schema7.String
|
|
1590
|
+
})
|
|
1591
|
+
)
|
|
1592
|
+
});
|
|
1593
|
+
var ThoughtEvaluationSchema = Schema7.Struct({
|
|
1594
|
+
score: Schema7.Number,
|
|
1595
|
+
reasoning: Schema7.String,
|
|
1596
|
+
strengths: Schema7.Array(Schema7.String),
|
|
1597
|
+
weaknesses: Schema7.Array(Schema7.String),
|
|
1598
|
+
shouldExpand: Schema7.Boolean
|
|
1599
|
+
});
|
|
1600
|
+
var ComplexityAnalysisSchema = Schema7.Struct({
|
|
1601
|
+
score: Schema7.Number,
|
|
1602
|
+
factors: Schema7.Array(
|
|
1603
|
+
Schema7.Struct({
|
|
1604
|
+
factor: Schema7.String,
|
|
1605
|
+
weight: Schema7.Number,
|
|
1606
|
+
reasoning: Schema7.String
|
|
1607
|
+
})
|
|
1608
|
+
),
|
|
1609
|
+
recommendedStrategy: Schema7.String,
|
|
1610
|
+
recommendedModel: Schema7.String
|
|
1611
|
+
});
|
|
1612
|
+
|
|
1613
|
+
// src/runtime.ts
|
|
1614
|
+
import { Layer as Layer8 } from "effect";
|
|
1615
|
+
var createLLMProviderLayer = (provider = "anthropic", testResponses) => {
|
|
1616
|
+
if (provider === "test") {
|
|
1617
|
+
return Layer8.mergeAll(
|
|
1618
|
+
TestLLMServiceLayer(testResponses ?? {}),
|
|
1619
|
+
PromptManagerLive
|
|
1620
|
+
);
|
|
1621
|
+
}
|
|
1622
|
+
const configLayer = LLMConfigFromEnv;
|
|
1623
|
+
const providerLayer = provider === "anthropic" ? AnthropicProviderLive : provider === "openai" ? OpenAIProviderLive : provider === "gemini" ? GeminiProviderLive : LocalProviderLive;
|
|
1624
|
+
return Layer8.mergeAll(
|
|
1625
|
+
providerLayer.pipe(Layer8.provide(configLayer)),
|
|
1626
|
+
PromptManagerLive
|
|
1627
|
+
);
|
|
1628
|
+
};
|
|
1629
|
+
var createLLMProviderLayerWithConfig = (config, provider = "anthropic") => {
|
|
1630
|
+
const configLayer = Layer8.succeed(LLMConfig, config);
|
|
1631
|
+
const providerLayer = provider === "anthropic" ? AnthropicProviderLive : provider === "openai" ? OpenAIProviderLive : provider === "gemini" ? GeminiProviderLive : LocalProviderLive;
|
|
1632
|
+
return Layer8.mergeAll(
|
|
1633
|
+
providerLayer.pipe(Layer8.provide(configLayer)),
|
|
1634
|
+
PromptManagerLive
|
|
1635
|
+
);
|
|
1636
|
+
};
|
|
1637
|
+
export {
|
|
1638
|
+
AnthropicProviderLive,
|
|
1639
|
+
CacheControlSchema,
|
|
1640
|
+
CompletionResponseSchema,
|
|
1641
|
+
ComplexityAnalysisSchema,
|
|
1642
|
+
DefaultEmbeddingConfig,
|
|
1643
|
+
EmbeddingConfigSchema,
|
|
1644
|
+
GeminiProviderLive,
|
|
1645
|
+
ImageContentBlockSchema,
|
|
1646
|
+
ImageSourceSchema,
|
|
1647
|
+
LLMConfig,
|
|
1648
|
+
LLMConfigFromEnv,
|
|
1649
|
+
LLMContextOverflowError,
|
|
1650
|
+
LLMError,
|
|
1651
|
+
LLMParseError,
|
|
1652
|
+
LLMProviderType,
|
|
1653
|
+
LLMRateLimitError,
|
|
1654
|
+
LLMService,
|
|
1655
|
+
LLMTimeoutError,
|
|
1656
|
+
LocalProviderLive,
|
|
1657
|
+
ModelConfigSchema,
|
|
1658
|
+
ModelPresets,
|
|
1659
|
+
OpenAIProviderLive,
|
|
1660
|
+
PlanSchema,
|
|
1661
|
+
PromptManager,
|
|
1662
|
+
PromptManagerLive,
|
|
1663
|
+
ReActActionSchema,
|
|
1664
|
+
ReflectionSchema,
|
|
1665
|
+
StopReasonSchema,
|
|
1666
|
+
StrategySelectionSchema,
|
|
1667
|
+
TestLLMService,
|
|
1668
|
+
TestLLMServiceLayer,
|
|
1669
|
+
TextContentBlockSchema,
|
|
1670
|
+
ThoughtEvaluationSchema,
|
|
1671
|
+
TokenUsageSchema,
|
|
1672
|
+
ToolCallSchema,
|
|
1673
|
+
ToolDefinitionSchema,
|
|
1674
|
+
ToolResultContentBlockSchema,
|
|
1675
|
+
ToolUseContentBlockSchema,
|
|
1676
|
+
calculateCost,
|
|
1677
|
+
createLLMProviderLayer,
|
|
1678
|
+
createLLMProviderLayerWithConfig,
|
|
1679
|
+
estimateTokenCount,
|
|
1680
|
+
makeCacheable,
|
|
1681
|
+
retryPolicy
|
|
1682
|
+
};
|
|
1683
|
+
//# sourceMappingURL=index.js.map
|