@reactive-agents/llm-provider 0.5.0 → 0.5.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +814 -29
- package/dist/index.js +532 -74
- package/dist/index.js.map +1 -1
- package/package.json +2 -2
package/dist/index.js
CHANGED
|
@@ -1123,16 +1123,27 @@ var init_dist = __esm({
|
|
|
1123
1123
|
// src/types.ts
|
|
1124
1124
|
import { Schema } from "effect";
|
|
1125
1125
|
var LLMProviderType = Schema.Literal(
|
|
1126
|
+
/** Claude models via Anthropic API. Requires ANTHROPIC_API_KEY. */
|
|
1126
1127
|
"anthropic",
|
|
1128
|
+
/** GPT models via OpenAI API. Requires OPENAI_API_KEY. */
|
|
1127
1129
|
"openai",
|
|
1130
|
+
/** Local models via Ollama. Requires a running Ollama server. */
|
|
1128
1131
|
"ollama",
|
|
1132
|
+
/** Google Gemini models. Requires GOOGLE_API_KEY. */
|
|
1129
1133
|
"gemini",
|
|
1134
|
+
/** LiteLLM proxy — unified gateway to 40+ model providers. */
|
|
1135
|
+
"litellm",
|
|
1136
|
+
/** User-defined provider adapter — implement the LLMService interface. */
|
|
1130
1137
|
"custom"
|
|
1131
1138
|
);
|
|
1132
1139
|
var EmbeddingConfigSchema = Schema.Struct({
|
|
1140
|
+
/** Embedding model name (e.g., "text-embedding-3-small") */
|
|
1133
1141
|
model: Schema.String,
|
|
1142
|
+
/** Output embedding vector dimensionality */
|
|
1134
1143
|
dimensions: Schema.Number,
|
|
1144
|
+
/** Provider hosting the embedding model */
|
|
1135
1145
|
provider: Schema.Literal("openai", "ollama"),
|
|
1146
|
+
/** Maximum vectors to embed in a single API call (default: 100) */
|
|
1136
1147
|
batchSize: Schema.optional(Schema.Number)
|
|
1137
1148
|
});
|
|
1138
1149
|
var DefaultEmbeddingConfig = {
|
|
@@ -1142,110 +1153,176 @@ var DefaultEmbeddingConfig = {
|
|
|
1142
1153
|
batchSize: 100
|
|
1143
1154
|
};
|
|
1144
1155
|
var ModelConfigSchema = Schema.Struct({
|
|
1156
|
+
/** LLM provider identifier */
|
|
1145
1157
|
provider: LLMProviderType,
|
|
1158
|
+
/** Model name/identifier for the provider */
|
|
1146
1159
|
model: Schema.String,
|
|
1160
|
+
/** Maximum tokens in response (optional) */
|
|
1147
1161
|
maxTokens: Schema.optional(Schema.Number),
|
|
1162
|
+
/** Sampling temperature 0.0-1.0 (optional) */
|
|
1148
1163
|
temperature: Schema.optional(Schema.Number),
|
|
1164
|
+
/** Top-p (nucleus) sampling probability (optional) */
|
|
1149
1165
|
topP: Schema.optional(Schema.Number),
|
|
1166
|
+
/** Stop sequences to halt generation (optional) */
|
|
1150
1167
|
stopSequences: Schema.optional(Schema.Array(Schema.String))
|
|
1151
1168
|
});
|
|
1152
1169
|
var ModelPresets = {
|
|
1170
|
+
/**
|
|
1171
|
+
* Claude 3.5 Haiku — fast, cost-effective Anthropic model.
|
|
1172
|
+
* Best for low-latency, simple reasoning tasks; not recommended for complex analysis.
|
|
1173
|
+
*/
|
|
1153
1174
|
"claude-haiku": {
|
|
1154
1175
|
provider: "anthropic",
|
|
1155
1176
|
model: "claude-3-5-haiku-20241022",
|
|
1177
|
+
/** Cost per 1 million input tokens in USD */
|
|
1156
1178
|
costPer1MInput: 1,
|
|
1179
|
+
/** Cost per 1 million output tokens in USD */
|
|
1157
1180
|
costPer1MOutput: 5,
|
|
1181
|
+
/** Maximum context window in tokens */
|
|
1158
1182
|
maxContext: 2e5,
|
|
1183
|
+
/** Quality tier (0.6 = reliable for simple tasks) */
|
|
1159
1184
|
quality: 0.6
|
|
1160
1185
|
},
|
|
1186
|
+
/**
|
|
1187
|
+
* Claude Sonnet 4 — balanced Anthropic model.
|
|
1188
|
+
* Recommended for general-purpose reasoning, tool use, and production agents.
|
|
1189
|
+
*/
|
|
1161
1190
|
"claude-sonnet": {
|
|
1162
1191
|
provider: "anthropic",
|
|
1163
1192
|
model: "claude-sonnet-4-20250514",
|
|
1164
1193
|
costPer1MInput: 3,
|
|
1165
1194
|
costPer1MOutput: 15,
|
|
1166
1195
|
maxContext: 2e5,
|
|
1196
|
+
/** Quality tier (0.85 = excellent reasoning) */
|
|
1167
1197
|
quality: 0.85
|
|
1168
1198
|
},
|
|
1199
|
+
/**
|
|
1200
|
+
* Claude Sonnet 4.5 — latest Anthropic model.
|
|
1201
|
+
* Superior reasoning over Sonnet 4; recommended for complex multi-step reasoning.
|
|
1202
|
+
*/
|
|
1169
1203
|
"claude-sonnet-4-5": {
|
|
1170
1204
|
provider: "anthropic",
|
|
1171
1205
|
model: "claude-sonnet-4-5-20250929",
|
|
1172
1206
|
costPer1MInput: 3,
|
|
1173
1207
|
costPer1MOutput: 15,
|
|
1174
1208
|
maxContext: 2e5,
|
|
1209
|
+
/** Quality tier (0.9 = very strong reasoning) */
|
|
1175
1210
|
quality: 0.9
|
|
1176
1211
|
},
|
|
1212
|
+
/**
|
|
1213
|
+
* Claude Opus 4 — most capable Anthropic model.
|
|
1214
|
+
* Best for complex analysis, research, and high-accuracy multi-hop reasoning.
|
|
1215
|
+
* Largest context window (1M tokens); highest cost.
|
|
1216
|
+
*/
|
|
1177
1217
|
"claude-opus": {
|
|
1178
1218
|
provider: "anthropic",
|
|
1179
1219
|
model: "claude-opus-4-20250514",
|
|
1180
1220
|
costPer1MInput: 15,
|
|
1181
1221
|
costPer1MOutput: 75,
|
|
1182
1222
|
maxContext: 1e6,
|
|
1223
|
+
/** Quality tier (1.0 = frontier-class reasoning) */
|
|
1183
1224
|
quality: 1
|
|
1184
1225
|
},
|
|
1226
|
+
/**
|
|
1227
|
+
* GPT-4o Mini — fast, low-cost OpenAI model.
|
|
1228
|
+
* Good for simple tasks and high-throughput scenarios.
|
|
1229
|
+
*/
|
|
1185
1230
|
"gpt-4o-mini": {
|
|
1186
1231
|
provider: "openai",
|
|
1187
1232
|
model: "gpt-4o-mini",
|
|
1188
1233
|
costPer1MInput: 0.15,
|
|
1189
1234
|
costPer1MOutput: 0.6,
|
|
1190
1235
|
maxContext: 128e3,
|
|
1236
|
+
/** Quality tier (0.55 = capable but less reliable for complex reasoning) */
|
|
1191
1237
|
quality: 0.55
|
|
1192
1238
|
},
|
|
1239
|
+
/**
|
|
1240
|
+
* GPT-4o — latest OpenAI flagship model.
|
|
1241
|
+
* Strong reasoning, multimodal support; recommended for tool use and complex analysis.
|
|
1242
|
+
*/
|
|
1193
1243
|
"gpt-4o": {
|
|
1194
1244
|
provider: "openai",
|
|
1195
1245
|
model: "gpt-4o",
|
|
1196
1246
|
costPer1MInput: 2.5,
|
|
1197
1247
|
costPer1MOutput: 10,
|
|
1198
1248
|
maxContext: 128e3,
|
|
1249
|
+
/** Quality tier (0.8 = very good reasoning) */
|
|
1199
1250
|
quality: 0.8
|
|
1200
1251
|
},
|
|
1252
|
+
/**
|
|
1253
|
+
* Gemini 2.0 Flash — fast Google model.
|
|
1254
|
+
* Excellent speed and cost efficiency; large 1M context window.
|
|
1255
|
+
*/
|
|
1201
1256
|
"gemini-2.0-flash": {
|
|
1202
1257
|
provider: "gemini",
|
|
1203
1258
|
model: "gemini-2.0-flash",
|
|
1204
1259
|
costPer1MInput: 0.1,
|
|
1205
1260
|
costPer1MOutput: 0.4,
|
|
1206
1261
|
maxContext: 1e6,
|
|
1262
|
+
/** Quality tier (0.75 = good reasoning) */
|
|
1207
1263
|
quality: 0.75
|
|
1208
1264
|
},
|
|
1265
|
+
/**
|
|
1266
|
+
* Gemini 2.5 Pro Preview — advanced Google model.
|
|
1267
|
+
* Superior reasoning to Flash; large context window and competitive pricing.
|
|
1268
|
+
*/
|
|
1209
1269
|
"gemini-2.5-pro": {
|
|
1210
1270
|
provider: "gemini",
|
|
1211
1271
|
model: "gemini-2.5-pro-preview-03-25",
|
|
1212
1272
|
costPer1MInput: 1.25,
|
|
1213
1273
|
costPer1MOutput: 10,
|
|
1214
1274
|
maxContext: 1e6,
|
|
1275
|
+
/** Quality tier (0.95 = excellent reasoning) */
|
|
1215
1276
|
quality: 0.95
|
|
1216
1277
|
}
|
|
1217
1278
|
};
|
|
1218
1279
|
var CacheControlSchema = Schema.Struct({
|
|
1280
|
+
/** Cache type: "ephemeral" for request-scoped caching */
|
|
1219
1281
|
type: Schema.Literal("ephemeral")
|
|
1220
1282
|
});
|
|
1221
1283
|
var ImageSourceSchema = Schema.Struct({
|
|
1284
|
+
/** Image source type: "base64" for encoded data or "url" for HTTP(S) URL */
|
|
1222
1285
|
type: Schema.Literal("base64", "url"),
|
|
1286
|
+
/** MIME type of image: PNG, JPEG, GIF, or WebP */
|
|
1223
1287
|
media_type: Schema.Literal(
|
|
1224
1288
|
"image/png",
|
|
1225
1289
|
"image/jpeg",
|
|
1226
1290
|
"image/gif",
|
|
1227
1291
|
"image/webp"
|
|
1228
1292
|
),
|
|
1293
|
+
/** Either base64-encoded data or HTTPS URL */
|
|
1229
1294
|
data: Schema.String
|
|
1230
1295
|
});
|
|
1231
1296
|
var TextContentBlockSchema = Schema.Struct({
|
|
1297
|
+
/** Content type identifier */
|
|
1232
1298
|
type: Schema.Literal("text"),
|
|
1299
|
+
/** Text content */
|
|
1233
1300
|
text: Schema.String,
|
|
1301
|
+
/** Optional Anthropic cache control directive */
|
|
1234
1302
|
cache_control: Schema.optional(CacheControlSchema)
|
|
1235
1303
|
});
|
|
1236
1304
|
var ImageContentBlockSchema = Schema.Struct({
|
|
1305
|
+
/** Content type identifier */
|
|
1237
1306
|
type: Schema.Literal("image"),
|
|
1307
|
+
/** Image source reference */
|
|
1238
1308
|
source: ImageSourceSchema
|
|
1239
1309
|
});
|
|
1240
1310
|
var ToolUseContentBlockSchema = Schema.Struct({
|
|
1311
|
+
/** Content type identifier */
|
|
1241
1312
|
type: Schema.Literal("tool_use"),
|
|
1313
|
+
/** Unique tool call identifier */
|
|
1242
1314
|
id: Schema.String,
|
|
1315
|
+
/** Tool name being invoked */
|
|
1243
1316
|
name: Schema.String,
|
|
1317
|
+
/** Tool parameters (JSON-compatible object) */
|
|
1244
1318
|
input: Schema.Unknown
|
|
1245
1319
|
});
|
|
1246
1320
|
var ToolResultContentBlockSchema = Schema.Struct({
|
|
1321
|
+
/** Content type identifier */
|
|
1247
1322
|
type: Schema.Literal("tool_result"),
|
|
1323
|
+
/** ID of tool call this result corresponds to */
|
|
1248
1324
|
tool_use_id: Schema.String,
|
|
1325
|
+
/** Tool result/output content */
|
|
1249
1326
|
content: Schema.String
|
|
1250
1327
|
});
|
|
1251
1328
|
var makeCacheable = (text) => ({
|
|
@@ -1254,32 +1331,51 @@ var makeCacheable = (text) => ({
|
|
|
1254
1331
|
cache_control: { type: "ephemeral" }
|
|
1255
1332
|
});
|
|
1256
1333
|
var TokenUsageSchema = Schema.Struct({
|
|
1334
|
+
/** Tokens consumed by the input (messages + system prompt) */
|
|
1257
1335
|
inputTokens: Schema.Number,
|
|
1336
|
+
/** Tokens generated in the response */
|
|
1258
1337
|
outputTokens: Schema.Number,
|
|
1338
|
+
/** Sum of input and output tokens */
|
|
1259
1339
|
totalTokens: Schema.Number,
|
|
1340
|
+
/** Estimated cost in USD based on provider pricing */
|
|
1260
1341
|
estimatedCost: Schema.Number
|
|
1261
1342
|
});
|
|
1262
1343
|
var StopReasonSchema = Schema.Literal(
|
|
1344
|
+
/** Model concluded naturally — full response present. */
|
|
1263
1345
|
"end_turn",
|
|
1346
|
+
/** Hit `maxTokens` limit — response may be truncated. */
|
|
1264
1347
|
"max_tokens",
|
|
1348
|
+
/** Hit a configured stop sequence — generation halted by design. */
|
|
1265
1349
|
"stop_sequence",
|
|
1350
|
+
/** Model is invoking a tool — `toolCalls` array is populated. */
|
|
1266
1351
|
"tool_use"
|
|
1267
1352
|
);
|
|
1268
1353
|
var ToolDefinitionSchema = Schema.Struct({
|
|
1354
|
+
/** Tool identifier (used by model to invoke the tool) */
|
|
1269
1355
|
name: Schema.String,
|
|
1356
|
+
/** Human-readable tool description for the model */
|
|
1270
1357
|
description: Schema.String,
|
|
1358
|
+
/** Input schema describing expected parameters (JSON Schema format) */
|
|
1271
1359
|
inputSchema: Schema.Record({ key: Schema.String, value: Schema.Unknown })
|
|
1272
1360
|
});
|
|
1273
1361
|
var ToolCallSchema = Schema.Struct({
|
|
1362
|
+
/** Unique tool call identifier (generated by model) */
|
|
1274
1363
|
id: Schema.String,
|
|
1364
|
+
/** Tool name to invoke */
|
|
1275
1365
|
name: Schema.String,
|
|
1366
|
+
/** Tool input parameters (arbitrary JSON-compatible object) */
|
|
1276
1367
|
input: Schema.Unknown
|
|
1277
1368
|
});
|
|
1278
1369
|
var CompletionResponseSchema = Schema.Struct({
|
|
1370
|
+
/** Generated response content (text only, no content blocks) */
|
|
1279
1371
|
content: Schema.String,
|
|
1372
|
+
/** Why the model stopped generating */
|
|
1280
1373
|
stopReason: StopReasonSchema,
|
|
1374
|
+
/** Token usage statistics */
|
|
1281
1375
|
usage: TokenUsageSchema,
|
|
1376
|
+
/** Actual model identifier used (may differ from request) */
|
|
1282
1377
|
model: Schema.String,
|
|
1378
|
+
/** Tool calls emitted by the model (if any) */
|
|
1283
1379
|
toolCalls: Schema.optional(Schema.Array(ToolCallSchema))
|
|
1284
1380
|
});
|
|
1285
1381
|
|
|
@@ -2068,12 +2164,34 @@ var mapOpenAIResponse = (response, model) => {
|
|
|
2068
2164
|
|
|
2069
2165
|
// src/providers/local.ts
|
|
2070
2166
|
import { Effect as Effect6, Layer as Layer5, Stream as Stream3, Schema as Schema4 } from "effect";
|
|
2071
|
-
var toOllamaMessages = (messages) => messages.
|
|
2072
|
-
|
|
2073
|
-
|
|
2074
|
-
|
|
2075
|
-
|
|
2076
|
-
|
|
2167
|
+
var toOllamaMessages = (messages) => messages.map((m) => {
|
|
2168
|
+
if (m.role === "tool") {
|
|
2169
|
+
return { role: "tool", content: m.content };
|
|
2170
|
+
}
|
|
2171
|
+
if (m.role === "assistant") {
|
|
2172
|
+
const textContent = typeof m.content === "string" ? m.content : m.content.filter((b) => b.type === "text").map((b) => b.text).join("");
|
|
2173
|
+
const toolUseBlocks = typeof m.content !== "string" ? m.content.filter(
|
|
2174
|
+
(b) => b.type === "tool_use"
|
|
2175
|
+
) : [];
|
|
2176
|
+
return {
|
|
2177
|
+
role: "assistant",
|
|
2178
|
+
content: textContent,
|
|
2179
|
+
...toolUseBlocks.length > 0 ? {
|
|
2180
|
+
tool_calls: toolUseBlocks.map((tc) => ({
|
|
2181
|
+
function: {
|
|
2182
|
+
name: tc.name,
|
|
2183
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
2184
|
+
arguments: tc.input ?? {}
|
|
2185
|
+
}
|
|
2186
|
+
}))
|
|
2187
|
+
} : {}
|
|
2188
|
+
};
|
|
2189
|
+
}
|
|
2190
|
+
return {
|
|
2191
|
+
role: m.role,
|
|
2192
|
+
content: typeof m.content === "string" ? m.content : m.content.filter((b) => b.type === "text").map((b) => b.text).join("")
|
|
2193
|
+
};
|
|
2194
|
+
});
|
|
2077
2195
|
var toOllamaTools = (tools) => {
|
|
2078
2196
|
if (!tools || tools.length === 0) return void 0;
|
|
2079
2197
|
return tools.map((t) => ({
|
|
@@ -2657,10 +2775,349 @@ No markdown, no code fences, just raw JSON.`
|
|
|
2657
2775
|
})
|
|
2658
2776
|
);
|
|
2659
2777
|
|
|
2660
|
-
// src/
|
|
2778
|
+
// src/providers/litellm.ts
|
|
2661
2779
|
import { Effect as Effect8, Layer as Layer7, Stream as Stream5, Schema as Schema6 } from "effect";
|
|
2780
|
+
var toLiteLLMMessages = (messages) => messages.map((m) => {
|
|
2781
|
+
if (m.role === "tool") {
|
|
2782
|
+
return {
|
|
2783
|
+
role: "tool",
|
|
2784
|
+
tool_call_id: m.toolCallId,
|
|
2785
|
+
content: m.content
|
|
2786
|
+
};
|
|
2787
|
+
}
|
|
2788
|
+
return {
|
|
2789
|
+
role: m.role,
|
|
2790
|
+
content: typeof m.content === "string" ? m.content : m.content.filter(
|
|
2791
|
+
(b) => b.type === "text"
|
|
2792
|
+
).map((b) => b.text).join("")
|
|
2793
|
+
};
|
|
2794
|
+
});
|
|
2795
|
+
var toEffectError4 = (error) => {
|
|
2796
|
+
const err = error;
|
|
2797
|
+
if (err.status === 429) {
|
|
2798
|
+
return new LLMRateLimitError({
|
|
2799
|
+
message: err.message ?? "Rate limit exceeded",
|
|
2800
|
+
provider: "litellm",
|
|
2801
|
+
retryAfterMs: 6e4
|
|
2802
|
+
});
|
|
2803
|
+
}
|
|
2804
|
+
return new LLMError({
|
|
2805
|
+
message: err.message ?? String(error),
|
|
2806
|
+
provider: "litellm",
|
|
2807
|
+
cause: error
|
|
2808
|
+
});
|
|
2809
|
+
};
|
|
2810
|
+
var toLiteLLMTool = (tool) => ({
|
|
2811
|
+
type: "function",
|
|
2812
|
+
function: {
|
|
2813
|
+
name: tool.name,
|
|
2814
|
+
description: tool.description,
|
|
2815
|
+
parameters: tool.inputSchema
|
|
2816
|
+
}
|
|
2817
|
+
});
|
|
2818
|
+
var mapLiteLLMResponse = (response, model) => {
|
|
2819
|
+
const message = response.choices[0]?.message;
|
|
2820
|
+
const content = message?.content ?? "";
|
|
2821
|
+
const rawToolCalls = message?.tool_calls;
|
|
2822
|
+
const hasToolCalls = rawToolCalls && rawToolCalls.length > 0;
|
|
2823
|
+
const stopReason = response.choices[0]?.finish_reason === "tool_calls" || hasToolCalls ? "tool_use" : response.choices[0]?.finish_reason === "stop" ? "end_turn" : response.choices[0]?.finish_reason === "length" ? "max_tokens" : "end_turn";
|
|
2824
|
+
const toolCalls = hasToolCalls ? rawToolCalls.map((tc) => {
|
|
2825
|
+
let input;
|
|
2826
|
+
try {
|
|
2827
|
+
input = JSON.parse(tc.function.arguments);
|
|
2828
|
+
} catch {
|
|
2829
|
+
input = { raw: tc.function.arguments };
|
|
2830
|
+
}
|
|
2831
|
+
return { id: tc.id, name: tc.function.name, input };
|
|
2832
|
+
}) : void 0;
|
|
2833
|
+
return {
|
|
2834
|
+
content,
|
|
2835
|
+
stopReason,
|
|
2836
|
+
usage: {
|
|
2837
|
+
inputTokens: response.usage?.prompt_tokens ?? 0,
|
|
2838
|
+
outputTokens: response.usage?.completion_tokens ?? 0,
|
|
2839
|
+
totalTokens: response.usage?.total_tokens ?? 0,
|
|
2840
|
+
estimatedCost: calculateCost(
|
|
2841
|
+
response.usage?.prompt_tokens ?? 0,
|
|
2842
|
+
response.usage?.completion_tokens ?? 0,
|
|
2843
|
+
model
|
|
2844
|
+
)
|
|
2845
|
+
},
|
|
2846
|
+
model: response.model ?? model,
|
|
2847
|
+
toolCalls
|
|
2848
|
+
};
|
|
2849
|
+
};
|
|
2850
|
+
var liteLLMFetch = async (baseURL, path, body, apiKey) => {
|
|
2851
|
+
const headers = {
|
|
2852
|
+
"Content-Type": "application/json"
|
|
2853
|
+
};
|
|
2854
|
+
if (apiKey) headers["Authorization"] = `Bearer ${apiKey}`;
|
|
2855
|
+
const res = await fetch(`${baseURL}${path}`, {
|
|
2856
|
+
method: "POST",
|
|
2857
|
+
headers,
|
|
2858
|
+
body: JSON.stringify(body)
|
|
2859
|
+
});
|
|
2860
|
+
if (!res.ok) {
|
|
2861
|
+
const text = await res.text().catch(() => "");
|
|
2862
|
+
throw Object.assign(
|
|
2863
|
+
new Error(`LiteLLM ${res.status}: ${text || res.statusText}`),
|
|
2864
|
+
{ status: res.status }
|
|
2865
|
+
);
|
|
2866
|
+
}
|
|
2867
|
+
return res.json();
|
|
2868
|
+
};
|
|
2869
|
+
var LiteLLMProviderLive = Layer7.effect(
|
|
2870
|
+
LLMService,
|
|
2871
|
+
Effect8.gen(function* () {
|
|
2872
|
+
const config = yield* LLMConfig;
|
|
2873
|
+
const baseURL = config.litellmBaseUrl ?? process.env.LITELLM_BASE_URL ?? "http://localhost:4000";
|
|
2874
|
+
const apiKey = config.litellmApiKey ?? process.env.LITELLM_API_KEY ?? void 0;
|
|
2875
|
+
const defaultModel = config.defaultModel;
|
|
2876
|
+
return LLMService.of({
|
|
2877
|
+
complete: (request) => Effect8.gen(function* () {
|
|
2878
|
+
const model = typeof request.model === "string" ? request.model : request.model?.model ?? defaultModel;
|
|
2879
|
+
const messages = toLiteLLMMessages(request.messages);
|
|
2880
|
+
if (request.systemPrompt) {
|
|
2881
|
+
messages.unshift({ role: "system", content: request.systemPrompt });
|
|
2882
|
+
}
|
|
2883
|
+
const requestBody = {
|
|
2884
|
+
model,
|
|
2885
|
+
max_tokens: request.maxTokens ?? config.defaultMaxTokens,
|
|
2886
|
+
temperature: request.temperature ?? config.defaultTemperature,
|
|
2887
|
+
messages,
|
|
2888
|
+
stop: request.stopSequences ? [...request.stopSequences] : void 0
|
|
2889
|
+
};
|
|
2890
|
+
if (request.tools && request.tools.length > 0) {
|
|
2891
|
+
requestBody.tools = request.tools.map(toLiteLLMTool);
|
|
2892
|
+
}
|
|
2893
|
+
const response = yield* Effect8.tryPromise({
|
|
2894
|
+
try: () => liteLLMFetch(baseURL, "/chat/completions", requestBody, apiKey),
|
|
2895
|
+
catch: (error) => toEffectError4(error)
|
|
2896
|
+
});
|
|
2897
|
+
return mapLiteLLMResponse(response, model);
|
|
2898
|
+
}).pipe(
|
|
2899
|
+
Effect8.retry(retryPolicy),
|
|
2900
|
+
Effect8.timeout("30 seconds"),
|
|
2901
|
+
Effect8.catchTag(
|
|
2902
|
+
"TimeoutException",
|
|
2903
|
+
() => Effect8.fail(
|
|
2904
|
+
new LLMTimeoutError({
|
|
2905
|
+
message: "LLM request timed out",
|
|
2906
|
+
provider: "litellm",
|
|
2907
|
+
timeoutMs: 3e4
|
|
2908
|
+
})
|
|
2909
|
+
)
|
|
2910
|
+
)
|
|
2911
|
+
),
|
|
2912
|
+
stream: (request) => Effect8.gen(function* () {
|
|
2913
|
+
const model = typeof request.model === "string" ? request.model : request.model?.model ?? defaultModel;
|
|
2914
|
+
return Stream5.async((emit) => {
|
|
2915
|
+
const doStream = async () => {
|
|
2916
|
+
try {
|
|
2917
|
+
const headers = {
|
|
2918
|
+
"Content-Type": "application/json"
|
|
2919
|
+
};
|
|
2920
|
+
if (apiKey) headers["Authorization"] = `Bearer ${apiKey}`;
|
|
2921
|
+
const messages = toLiteLLMMessages(request.messages);
|
|
2922
|
+
if (request.systemPrompt) {
|
|
2923
|
+
messages.unshift({
|
|
2924
|
+
role: "system",
|
|
2925
|
+
content: request.systemPrompt
|
|
2926
|
+
});
|
|
2927
|
+
}
|
|
2928
|
+
const res = await fetch(`${baseURL}/chat/completions`, {
|
|
2929
|
+
method: "POST",
|
|
2930
|
+
headers,
|
|
2931
|
+
body: JSON.stringify({
|
|
2932
|
+
model,
|
|
2933
|
+
max_tokens: request.maxTokens ?? config.defaultMaxTokens,
|
|
2934
|
+
temperature: request.temperature ?? config.defaultTemperature,
|
|
2935
|
+
messages,
|
|
2936
|
+
stream: true
|
|
2937
|
+
})
|
|
2938
|
+
});
|
|
2939
|
+
if (!res.ok || !res.body) {
|
|
2940
|
+
throw new Error(`LiteLLM stream error: ${res.status}`);
|
|
2941
|
+
}
|
|
2942
|
+
const reader = res.body.getReader();
|
|
2943
|
+
const decoder = new TextDecoder();
|
|
2944
|
+
let buffer = "";
|
|
2945
|
+
let fullContent = "";
|
|
2946
|
+
while (true) {
|
|
2947
|
+
const { done, value } = await reader.read();
|
|
2948
|
+
if (done) break;
|
|
2949
|
+
buffer += decoder.decode(value, { stream: true });
|
|
2950
|
+
const lines = buffer.split("\n");
|
|
2951
|
+
buffer = lines.pop() ?? "";
|
|
2952
|
+
for (const line of lines) {
|
|
2953
|
+
const trimmed = line.trim();
|
|
2954
|
+
if (!trimmed.startsWith("data:")) continue;
|
|
2955
|
+
const data = trimmed.slice(5).trim();
|
|
2956
|
+
if (data === "[DONE]") {
|
|
2957
|
+
emit.single({
|
|
2958
|
+
type: "content_complete",
|
|
2959
|
+
content: fullContent
|
|
2960
|
+
});
|
|
2961
|
+
emit.end();
|
|
2962
|
+
return;
|
|
2963
|
+
}
|
|
2964
|
+
try {
|
|
2965
|
+
const chunk = JSON.parse(data);
|
|
2966
|
+
const delta = chunk.choices[0]?.delta?.content;
|
|
2967
|
+
if (delta) {
|
|
2968
|
+
fullContent += delta;
|
|
2969
|
+
emit.single({ type: "text_delta", text: delta });
|
|
2970
|
+
}
|
|
2971
|
+
if (chunk.choices[0]?.finish_reason) {
|
|
2972
|
+
const inputTokens = chunk.usage?.prompt_tokens ?? 0;
|
|
2973
|
+
const outputTokens = chunk.usage?.completion_tokens ?? 0;
|
|
2974
|
+
emit.single({
|
|
2975
|
+
type: "usage",
|
|
2976
|
+
usage: {
|
|
2977
|
+
inputTokens,
|
|
2978
|
+
outputTokens,
|
|
2979
|
+
totalTokens: inputTokens + outputTokens,
|
|
2980
|
+
estimatedCost: calculateCost(
|
|
2981
|
+
inputTokens,
|
|
2982
|
+
outputTokens,
|
|
2983
|
+
model
|
|
2984
|
+
)
|
|
2985
|
+
}
|
|
2986
|
+
});
|
|
2987
|
+
}
|
|
2988
|
+
} catch {
|
|
2989
|
+
}
|
|
2990
|
+
}
|
|
2991
|
+
}
|
|
2992
|
+
} catch (error) {
|
|
2993
|
+
const err = error;
|
|
2994
|
+
emit.fail(
|
|
2995
|
+
new LLMError({
|
|
2996
|
+
message: err.message ?? String(error),
|
|
2997
|
+
provider: "litellm",
|
|
2998
|
+
cause: error
|
|
2999
|
+
})
|
|
3000
|
+
);
|
|
3001
|
+
}
|
|
3002
|
+
};
|
|
3003
|
+
void doStream();
|
|
3004
|
+
});
|
|
3005
|
+
}),
|
|
3006
|
+
completeStructured: (request) => Effect8.gen(function* () {
|
|
3007
|
+
const schemaStr = JSON.stringify(
|
|
3008
|
+
Schema6.encodedSchema(request.outputSchema),
|
|
3009
|
+
null,
|
|
3010
|
+
2
|
|
3011
|
+
);
|
|
3012
|
+
const messagesWithFormat = [
|
|
3013
|
+
...request.messages,
|
|
3014
|
+
{
|
|
3015
|
+
role: "user",
|
|
3016
|
+
content: `
|
|
3017
|
+
Respond with ONLY valid JSON matching this schema:
|
|
3018
|
+
${schemaStr}
|
|
3019
|
+
|
|
3020
|
+
No markdown, no code fences, just raw JSON.`
|
|
3021
|
+
}
|
|
3022
|
+
];
|
|
3023
|
+
let lastError = null;
|
|
3024
|
+
const maxRetries = request.maxParseRetries ?? 2;
|
|
3025
|
+
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
|
3026
|
+
const msgs = attempt === 0 ? messagesWithFormat : [
|
|
3027
|
+
...messagesWithFormat,
|
|
3028
|
+
{
|
|
3029
|
+
role: "assistant",
|
|
3030
|
+
content: String(lastError)
|
|
3031
|
+
},
|
|
3032
|
+
{
|
|
3033
|
+
role: "user",
|
|
3034
|
+
content: `That response was not valid JSON. The parse error was: ${String(lastError)}. Please try again with valid JSON only.`
|
|
3035
|
+
}
|
|
3036
|
+
];
|
|
3037
|
+
const model = typeof request.model === "string" ? request.model : request.model?.model ?? defaultModel;
|
|
3038
|
+
const completeResult = yield* Effect8.tryPromise({
|
|
3039
|
+
try: () => liteLLMFetch(
|
|
3040
|
+
baseURL,
|
|
3041
|
+
"/chat/completions",
|
|
3042
|
+
{
|
|
3043
|
+
model,
|
|
3044
|
+
max_tokens: request.maxTokens ?? config.defaultMaxTokens,
|
|
3045
|
+
temperature: request.temperature ?? config.defaultTemperature,
|
|
3046
|
+
messages: toLiteLLMMessages(msgs)
|
|
3047
|
+
},
|
|
3048
|
+
apiKey
|
|
3049
|
+
),
|
|
3050
|
+
catch: (error) => toEffectError4(error)
|
|
3051
|
+
});
|
|
3052
|
+
const response = mapLiteLLMResponse(
|
|
3053
|
+
completeResult,
|
|
3054
|
+
model
|
|
3055
|
+
);
|
|
3056
|
+
try {
|
|
3057
|
+
const parsed = JSON.parse(response.content);
|
|
3058
|
+
const decoded = Schema6.decodeUnknownEither(
|
|
3059
|
+
request.outputSchema
|
|
3060
|
+
)(parsed);
|
|
3061
|
+
if (decoded._tag === "Right") {
|
|
3062
|
+
return decoded.right;
|
|
3063
|
+
}
|
|
3064
|
+
lastError = decoded.left;
|
|
3065
|
+
} catch (e) {
|
|
3066
|
+
lastError = e;
|
|
3067
|
+
}
|
|
3068
|
+
}
|
|
3069
|
+
return yield* Effect8.fail(
|
|
3070
|
+
new LLMParseError({
|
|
3071
|
+
message: `Failed to parse structured output after ${maxRetries + 1} attempts`,
|
|
3072
|
+
rawOutput: String(lastError),
|
|
3073
|
+
expectedSchema: schemaStr
|
|
3074
|
+
})
|
|
3075
|
+
);
|
|
3076
|
+
}),
|
|
3077
|
+
embed: (texts, model) => Effect8.tryPromise({
|
|
3078
|
+
try: async () => {
|
|
3079
|
+
const embeddingModel = model ?? config.embeddingConfig.model;
|
|
3080
|
+
const batchSize = config.embeddingConfig.batchSize ?? 100;
|
|
3081
|
+
const results = [];
|
|
3082
|
+
for (let i = 0; i < texts.length; i += batchSize) {
|
|
3083
|
+
const batch = texts.slice(i, i + batchSize);
|
|
3084
|
+
const response = await liteLLMFetch(
|
|
3085
|
+
baseURL,
|
|
3086
|
+
"/embeddings",
|
|
3087
|
+
{
|
|
3088
|
+
model: embeddingModel,
|
|
3089
|
+
input: [...batch],
|
|
3090
|
+
dimensions: config.embeddingConfig.dimensions
|
|
3091
|
+
},
|
|
3092
|
+
apiKey
|
|
3093
|
+
);
|
|
3094
|
+
results.push(
|
|
3095
|
+
...response.data.map((d) => d.embedding)
|
|
3096
|
+
);
|
|
3097
|
+
}
|
|
3098
|
+
return results;
|
|
3099
|
+
},
|
|
3100
|
+
catch: (error) => new LLMError({
|
|
3101
|
+
message: `Embedding failed: ${error}`,
|
|
3102
|
+
provider: "litellm",
|
|
3103
|
+
cause: error
|
|
3104
|
+
})
|
|
3105
|
+
}),
|
|
3106
|
+
countTokens: (messages) => Effect8.gen(function* () {
|
|
3107
|
+
return yield* estimateTokenCount(messages);
|
|
3108
|
+
}),
|
|
3109
|
+
getModelConfig: () => Effect8.succeed({
|
|
3110
|
+
provider: "litellm",
|
|
3111
|
+
model: defaultModel
|
|
3112
|
+
})
|
|
3113
|
+
});
|
|
3114
|
+
})
|
|
3115
|
+
);
|
|
3116
|
+
|
|
3117
|
+
// src/testing.ts
|
|
3118
|
+
import { Effect as Effect9, Layer as Layer8, Stream as Stream6, Schema as Schema7 } from "effect";
|
|
2662
3119
|
var TestLLMService = (responses) => ({
|
|
2663
|
-
complete: (request) =>
|
|
3120
|
+
complete: (request) => Effect9.gen(function* () {
|
|
2664
3121
|
const lastMessage = request.messages[request.messages.length - 1];
|
|
2665
3122
|
const content = lastMessage && typeof lastMessage.content === "string" ? lastMessage.content : "";
|
|
2666
3123
|
const systemPrompt = typeof request.systemPrompt === "string" ? request.systemPrompt : "";
|
|
@@ -2692,8 +3149,8 @@ var TestLLMService = (responses) => ({
|
|
|
2692
3149
|
model: "test-model"
|
|
2693
3150
|
};
|
|
2694
3151
|
}),
|
|
2695
|
-
stream: (_request) =>
|
|
2696
|
-
|
|
3152
|
+
stream: (_request) => Effect9.succeed(
|
|
3153
|
+
Stream6.make(
|
|
2697
3154
|
{ type: "text_delta", text: "Test " },
|
|
2698
3155
|
{ type: "text_delta", text: "response" },
|
|
2699
3156
|
{
|
|
@@ -2711,7 +3168,7 @@ var TestLLMService = (responses) => ({
|
|
|
2711
3168
|
}
|
|
2712
3169
|
)
|
|
2713
3170
|
),
|
|
2714
|
-
completeStructured: (request) =>
|
|
3171
|
+
completeStructured: (request) => Effect9.gen(function* () {
|
|
2715
3172
|
const lastMessage = request.messages[request.messages.length - 1];
|
|
2716
3173
|
const content = lastMessage && typeof lastMessage.content === "string" ? lastMessage.content : "";
|
|
2717
3174
|
let responseContent = "Test response";
|
|
@@ -2722,109 +3179,109 @@ var TestLLMService = (responses) => ({
|
|
|
2722
3179
|
}
|
|
2723
3180
|
}
|
|
2724
3181
|
const parsed = JSON.parse(responseContent);
|
|
2725
|
-
return
|
|
3182
|
+
return Schema7.decodeUnknownSync(request.outputSchema)(parsed);
|
|
2726
3183
|
}),
|
|
2727
|
-
embed: (texts) =>
|
|
3184
|
+
embed: (texts) => Effect9.succeed(
|
|
2728
3185
|
texts.map(() => new Array(768).fill(0).map(() => Math.random()))
|
|
2729
3186
|
),
|
|
2730
|
-
countTokens: (messages) =>
|
|
3187
|
+
countTokens: (messages) => Effect9.succeed(
|
|
2731
3188
|
messages.reduce(
|
|
2732
3189
|
(sum, m) => sum + (typeof m.content === "string" ? Math.ceil(m.content.length / 4) : 100),
|
|
2733
3190
|
0
|
|
2734
3191
|
)
|
|
2735
3192
|
),
|
|
2736
|
-
getModelConfig: () =>
|
|
3193
|
+
getModelConfig: () => Effect9.succeed({
|
|
2737
3194
|
provider: "anthropic",
|
|
2738
3195
|
model: "test-model"
|
|
2739
3196
|
})
|
|
2740
3197
|
});
|
|
2741
|
-
var TestLLMServiceLayer = (responses = {}) =>
|
|
3198
|
+
var TestLLMServiceLayer = (responses = {}) => Layer8.succeed(LLMService, LLMService.of(TestLLMService(responses)));
|
|
2742
3199
|
|
|
2743
3200
|
// src/structured-output.ts
|
|
2744
|
-
import { Schema as
|
|
2745
|
-
var ReActActionSchema =
|
|
2746
|
-
thought:
|
|
2747
|
-
action:
|
|
2748
|
-
|
|
2749
|
-
tool:
|
|
2750
|
-
input:
|
|
3201
|
+
import { Schema as Schema8 } from "effect";
|
|
3202
|
+
var ReActActionSchema = Schema8.Struct({
|
|
3203
|
+
thought: Schema8.String,
|
|
3204
|
+
action: Schema8.optional(
|
|
3205
|
+
Schema8.Struct({
|
|
3206
|
+
tool: Schema8.String,
|
|
3207
|
+
input: Schema8.Unknown
|
|
2751
3208
|
})
|
|
2752
3209
|
),
|
|
2753
|
-
finalAnswer:
|
|
2754
|
-
isComplete:
|
|
3210
|
+
finalAnswer: Schema8.optional(Schema8.String),
|
|
3211
|
+
isComplete: Schema8.Boolean
|
|
2755
3212
|
});
|
|
2756
|
-
var PlanSchema =
|
|
2757
|
-
goal:
|
|
2758
|
-
steps:
|
|
2759
|
-
|
|
2760
|
-
id:
|
|
2761
|
-
description:
|
|
2762
|
-
tool:
|
|
2763
|
-
dependsOn:
|
|
2764
|
-
estimatedDuration:
|
|
3213
|
+
var PlanSchema = Schema8.Struct({
|
|
3214
|
+
goal: Schema8.String,
|
|
3215
|
+
steps: Schema8.Array(
|
|
3216
|
+
Schema8.Struct({
|
|
3217
|
+
id: Schema8.Number,
|
|
3218
|
+
description: Schema8.String,
|
|
3219
|
+
tool: Schema8.optional(Schema8.String),
|
|
3220
|
+
dependsOn: Schema8.optional(Schema8.Array(Schema8.Number)),
|
|
3221
|
+
estimatedDuration: Schema8.optional(Schema8.String)
|
|
2765
3222
|
})
|
|
2766
3223
|
)
|
|
2767
3224
|
});
|
|
2768
|
-
var ReflectionSchema =
|
|
2769
|
-
taskAccomplished:
|
|
2770
|
-
confidence:
|
|
2771
|
-
strengths:
|
|
2772
|
-
weaknesses:
|
|
2773
|
-
needsRefinement:
|
|
2774
|
-
refinementSuggestions:
|
|
3225
|
+
var ReflectionSchema = Schema8.Struct({
|
|
3226
|
+
taskAccomplished: Schema8.Boolean,
|
|
3227
|
+
confidence: Schema8.Number,
|
|
3228
|
+
strengths: Schema8.Array(Schema8.String),
|
|
3229
|
+
weaknesses: Schema8.Array(Schema8.String),
|
|
3230
|
+
needsRefinement: Schema8.Boolean,
|
|
3231
|
+
refinementSuggestions: Schema8.optional(Schema8.Array(Schema8.String))
|
|
2775
3232
|
});
|
|
2776
|
-
var StrategySelectionSchema =
|
|
2777
|
-
selectedStrategy:
|
|
2778
|
-
reasoning:
|
|
2779
|
-
confidence:
|
|
2780
|
-
alternativeStrategies:
|
|
2781
|
-
|
|
2782
|
-
strategy:
|
|
2783
|
-
whyNot:
|
|
3233
|
+
var StrategySelectionSchema = Schema8.Struct({
|
|
3234
|
+
selectedStrategy: Schema8.String,
|
|
3235
|
+
reasoning: Schema8.String,
|
|
3236
|
+
confidence: Schema8.Number,
|
|
3237
|
+
alternativeStrategies: Schema8.Array(
|
|
3238
|
+
Schema8.Struct({
|
|
3239
|
+
strategy: Schema8.String,
|
|
3240
|
+
whyNot: Schema8.String
|
|
2784
3241
|
})
|
|
2785
3242
|
)
|
|
2786
3243
|
});
|
|
2787
|
-
var ThoughtEvaluationSchema =
|
|
2788
|
-
score:
|
|
2789
|
-
reasoning:
|
|
2790
|
-
strengths:
|
|
2791
|
-
weaknesses:
|
|
2792
|
-
shouldExpand:
|
|
3244
|
+
var ThoughtEvaluationSchema = Schema8.Struct({
|
|
3245
|
+
score: Schema8.Number,
|
|
3246
|
+
reasoning: Schema8.String,
|
|
3247
|
+
strengths: Schema8.Array(Schema8.String),
|
|
3248
|
+
weaknesses: Schema8.Array(Schema8.String),
|
|
3249
|
+
shouldExpand: Schema8.Boolean
|
|
2793
3250
|
});
|
|
2794
|
-
var ComplexityAnalysisSchema =
|
|
2795
|
-
score:
|
|
2796
|
-
factors:
|
|
2797
|
-
|
|
2798
|
-
factor:
|
|
2799
|
-
weight:
|
|
2800
|
-
reasoning:
|
|
3251
|
+
var ComplexityAnalysisSchema = Schema8.Struct({
|
|
3252
|
+
score: Schema8.Number,
|
|
3253
|
+
factors: Schema8.Array(
|
|
3254
|
+
Schema8.Struct({
|
|
3255
|
+
factor: Schema8.String,
|
|
3256
|
+
weight: Schema8.Number,
|
|
3257
|
+
reasoning: Schema8.String
|
|
2801
3258
|
})
|
|
2802
3259
|
),
|
|
2803
|
-
recommendedStrategy:
|
|
2804
|
-
recommendedModel:
|
|
3260
|
+
recommendedStrategy: Schema8.String,
|
|
3261
|
+
recommendedModel: Schema8.String
|
|
2805
3262
|
});
|
|
2806
3263
|
|
|
2807
3264
|
// src/runtime.ts
|
|
2808
|
-
import { Layer as
|
|
3265
|
+
import { Layer as Layer9 } from "effect";
|
|
2809
3266
|
var createLLMProviderLayer = (provider = "anthropic", testResponses, model) => {
|
|
2810
3267
|
if (provider === "test") {
|
|
2811
|
-
return
|
|
3268
|
+
return Layer9.mergeAll(
|
|
2812
3269
|
TestLLMServiceLayer(testResponses ?? {}),
|
|
2813
3270
|
PromptManagerLive
|
|
2814
3271
|
);
|
|
2815
3272
|
}
|
|
2816
|
-
const configLayer = model ?
|
|
2817
|
-
const providerLayer = provider === "anthropic" ? AnthropicProviderLive : provider === "openai" ? OpenAIProviderLive : provider === "gemini" ? GeminiProviderLive : LocalProviderLive;
|
|
2818
|
-
return
|
|
2819
|
-
providerLayer.pipe(
|
|
3273
|
+
const configLayer = model ? Layer9.succeed(LLMConfig, LLMConfig.of({ ...llmConfigFromEnv, defaultModel: model })) : LLMConfigFromEnv;
|
|
3274
|
+
const providerLayer = provider === "anthropic" ? AnthropicProviderLive : provider === "openai" ? OpenAIProviderLive : provider === "gemini" ? GeminiProviderLive : provider === "litellm" ? LiteLLMProviderLive : LocalProviderLive;
|
|
3275
|
+
return Layer9.mergeAll(
|
|
3276
|
+
providerLayer.pipe(Layer9.provide(configLayer)),
|
|
2820
3277
|
PromptManagerLive
|
|
2821
3278
|
);
|
|
2822
3279
|
};
|
|
2823
3280
|
var createLLMProviderLayerWithConfig = (config, provider = "anthropic") => {
|
|
2824
|
-
const configLayer =
|
|
2825
|
-
const providerLayer = provider === "anthropic" ? AnthropicProviderLive : provider === "openai" ? OpenAIProviderLive : provider === "gemini" ? GeminiProviderLive : LocalProviderLive;
|
|
2826
|
-
return
|
|
2827
|
-
providerLayer.pipe(
|
|
3281
|
+
const configLayer = Layer9.succeed(LLMConfig, config);
|
|
3282
|
+
const providerLayer = provider === "anthropic" ? AnthropicProviderLive : provider === "openai" ? OpenAIProviderLive : provider === "gemini" ? GeminiProviderLive : provider === "litellm" ? LiteLLMProviderLive : LocalProviderLive;
|
|
3283
|
+
return Layer9.mergeAll(
|
|
3284
|
+
providerLayer.pipe(Layer9.provide(configLayer)),
|
|
2828
3285
|
PromptManagerLive
|
|
2829
3286
|
);
|
|
2830
3287
|
};
|
|
@@ -2847,6 +3304,7 @@ export {
|
|
|
2847
3304
|
LLMRateLimitError,
|
|
2848
3305
|
LLMService,
|
|
2849
3306
|
LLMTimeoutError,
|
|
3307
|
+
LiteLLMProviderLive,
|
|
2850
3308
|
LocalProviderLive,
|
|
2851
3309
|
ModelConfigSchema,
|
|
2852
3310
|
ModelPresets,
|