@reactive-agents/llm-provider 0.5.0 → 0.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1123,16 +1123,27 @@ var init_dist = __esm({
1123
1123
  // src/types.ts
1124
1124
  import { Schema } from "effect";
1125
1125
  var LLMProviderType = Schema.Literal(
1126
+ /** Claude models via Anthropic API. Requires ANTHROPIC_API_KEY. */
1126
1127
  "anthropic",
1128
+ /** GPT models via OpenAI API. Requires OPENAI_API_KEY. */
1127
1129
  "openai",
1130
+ /** Local models via Ollama. Requires a running Ollama server. */
1128
1131
  "ollama",
1132
+ /** Google Gemini models. Requires GOOGLE_API_KEY. */
1129
1133
  "gemini",
1134
+ /** LiteLLM proxy — unified gateway to 40+ model providers. */
1135
+ "litellm",
1136
+ /** User-defined provider adapter — implement the LLMService interface. */
1130
1137
  "custom"
1131
1138
  );
1132
1139
  var EmbeddingConfigSchema = Schema.Struct({
1140
+ /** Embedding model name (e.g., "text-embedding-3-small") */
1133
1141
  model: Schema.String,
1142
+ /** Output embedding vector dimensionality */
1134
1143
  dimensions: Schema.Number,
1144
+ /** Provider hosting the embedding model */
1135
1145
  provider: Schema.Literal("openai", "ollama"),
1146
+ /** Maximum vectors to embed in a single API call (default: 100) */
1136
1147
  batchSize: Schema.optional(Schema.Number)
1137
1148
  });
1138
1149
  var DefaultEmbeddingConfig = {
@@ -1142,110 +1153,176 @@ var DefaultEmbeddingConfig = {
1142
1153
  batchSize: 100
1143
1154
  };
1144
1155
  var ModelConfigSchema = Schema.Struct({
1156
+ /** LLM provider identifier */
1145
1157
  provider: LLMProviderType,
1158
+ /** Model name/identifier for the provider */
1146
1159
  model: Schema.String,
1160
+ /** Maximum tokens in response (optional) */
1147
1161
  maxTokens: Schema.optional(Schema.Number),
1162
+ /** Sampling temperature 0.0-1.0 (optional) */
1148
1163
  temperature: Schema.optional(Schema.Number),
1164
+ /** Top-p (nucleus) sampling probability (optional) */
1149
1165
  topP: Schema.optional(Schema.Number),
1166
+ /** Stop sequences to halt generation (optional) */
1150
1167
  stopSequences: Schema.optional(Schema.Array(Schema.String))
1151
1168
  });
1152
1169
  var ModelPresets = {
1170
+ /**
1171
+ * Claude 3.5 Haiku — fast, cost-effective Anthropic model.
1172
+ * Best for low-latency, simple reasoning tasks; not recommended for complex analysis.
1173
+ */
1153
1174
  "claude-haiku": {
1154
1175
  provider: "anthropic",
1155
1176
  model: "claude-3-5-haiku-20241022",
1177
+ /** Cost per 1 million input tokens in USD */
1156
1178
  costPer1MInput: 1,
1179
+ /** Cost per 1 million output tokens in USD */
1157
1180
  costPer1MOutput: 5,
1181
+ /** Maximum context window in tokens */
1158
1182
  maxContext: 2e5,
1183
+ /** Quality tier (0.6 = reliable for simple tasks) */
1159
1184
  quality: 0.6
1160
1185
  },
1186
+ /**
1187
+ * Claude Sonnet 4 — balanced Anthropic model.
1188
+ * Recommended for general-purpose reasoning, tool use, and production agents.
1189
+ */
1161
1190
  "claude-sonnet": {
1162
1191
  provider: "anthropic",
1163
1192
  model: "claude-sonnet-4-20250514",
1164
1193
  costPer1MInput: 3,
1165
1194
  costPer1MOutput: 15,
1166
1195
  maxContext: 2e5,
1196
+ /** Quality tier (0.85 = excellent reasoning) */
1167
1197
  quality: 0.85
1168
1198
  },
1199
+ /**
1200
+ * Claude Sonnet 4.5 — latest Anthropic model.
1201
+ * Superior reasoning over Sonnet 4; recommended for complex multi-step reasoning.
1202
+ */
1169
1203
  "claude-sonnet-4-5": {
1170
1204
  provider: "anthropic",
1171
1205
  model: "claude-sonnet-4-5-20250929",
1172
1206
  costPer1MInput: 3,
1173
1207
  costPer1MOutput: 15,
1174
1208
  maxContext: 2e5,
1209
+ /** Quality tier (0.9 = very strong reasoning) */
1175
1210
  quality: 0.9
1176
1211
  },
1212
+ /**
1213
+ * Claude Opus 4 — most capable Anthropic model.
1214
+ * Best for complex analysis, research, and high-accuracy multi-hop reasoning.
1215
+ * Largest context window (1M tokens); highest cost.
1216
+ */
1177
1217
  "claude-opus": {
1178
1218
  provider: "anthropic",
1179
1219
  model: "claude-opus-4-20250514",
1180
1220
  costPer1MInput: 15,
1181
1221
  costPer1MOutput: 75,
1182
1222
  maxContext: 1e6,
1223
+ /** Quality tier (1.0 = frontier-class reasoning) */
1183
1224
  quality: 1
1184
1225
  },
1226
+ /**
1227
+ * GPT-4o Mini — fast, low-cost OpenAI model.
1228
+ * Good for simple tasks and high-throughput scenarios.
1229
+ */
1185
1230
  "gpt-4o-mini": {
1186
1231
  provider: "openai",
1187
1232
  model: "gpt-4o-mini",
1188
1233
  costPer1MInput: 0.15,
1189
1234
  costPer1MOutput: 0.6,
1190
1235
  maxContext: 128e3,
1236
+ /** Quality tier (0.55 = capable but less reliable for complex reasoning) */
1191
1237
  quality: 0.55
1192
1238
  },
1239
+ /**
1240
+ * GPT-4o — latest OpenAI flagship model.
1241
+ * Strong reasoning, multimodal support; recommended for tool use and complex analysis.
1242
+ */
1193
1243
  "gpt-4o": {
1194
1244
  provider: "openai",
1195
1245
  model: "gpt-4o",
1196
1246
  costPer1MInput: 2.5,
1197
1247
  costPer1MOutput: 10,
1198
1248
  maxContext: 128e3,
1249
+ /** Quality tier (0.8 = very good reasoning) */
1199
1250
  quality: 0.8
1200
1251
  },
1252
+ /**
1253
+ * Gemini 2.0 Flash — fast Google model.
1254
+ * Excellent speed and cost efficiency; large 1M context window.
1255
+ */
1201
1256
  "gemini-2.0-flash": {
1202
1257
  provider: "gemini",
1203
1258
  model: "gemini-2.0-flash",
1204
1259
  costPer1MInput: 0.1,
1205
1260
  costPer1MOutput: 0.4,
1206
1261
  maxContext: 1e6,
1262
+ /** Quality tier (0.75 = good reasoning) */
1207
1263
  quality: 0.75
1208
1264
  },
1265
+ /**
1266
+ * Gemini 2.5 Pro Preview — advanced Google model.
1267
+ * Superior reasoning to Flash; large context window and competitive pricing.
1268
+ */
1209
1269
  "gemini-2.5-pro": {
1210
1270
  provider: "gemini",
1211
1271
  model: "gemini-2.5-pro-preview-03-25",
1212
1272
  costPer1MInput: 1.25,
1213
1273
  costPer1MOutput: 10,
1214
1274
  maxContext: 1e6,
1275
+ /** Quality tier (0.95 = excellent reasoning) */
1215
1276
  quality: 0.95
1216
1277
  }
1217
1278
  };
1218
1279
  var CacheControlSchema = Schema.Struct({
1280
+ /** Cache type: "ephemeral" for request-scoped caching */
1219
1281
  type: Schema.Literal("ephemeral")
1220
1282
  });
1221
1283
  var ImageSourceSchema = Schema.Struct({
1284
+ /** Image source type: "base64" for encoded data or "url" for HTTP(S) URL */
1222
1285
  type: Schema.Literal("base64", "url"),
1286
+ /** MIME type of image: PNG, JPEG, GIF, or WebP */
1223
1287
  media_type: Schema.Literal(
1224
1288
  "image/png",
1225
1289
  "image/jpeg",
1226
1290
  "image/gif",
1227
1291
  "image/webp"
1228
1292
  ),
1293
+ /** Either base64-encoded data or HTTPS URL */
1229
1294
  data: Schema.String
1230
1295
  });
1231
1296
  var TextContentBlockSchema = Schema.Struct({
1297
+ /** Content type identifier */
1232
1298
  type: Schema.Literal("text"),
1299
+ /** Text content */
1233
1300
  text: Schema.String,
1301
+ /** Optional Anthropic cache control directive */
1234
1302
  cache_control: Schema.optional(CacheControlSchema)
1235
1303
  });
1236
1304
  var ImageContentBlockSchema = Schema.Struct({
1305
+ /** Content type identifier */
1237
1306
  type: Schema.Literal("image"),
1307
+ /** Image source reference */
1238
1308
  source: ImageSourceSchema
1239
1309
  });
1240
1310
  var ToolUseContentBlockSchema = Schema.Struct({
1311
+ /** Content type identifier */
1241
1312
  type: Schema.Literal("tool_use"),
1313
+ /** Unique tool call identifier */
1242
1314
  id: Schema.String,
1315
+ /** Tool name being invoked */
1243
1316
  name: Schema.String,
1317
+ /** Tool parameters (JSON-compatible object) */
1244
1318
  input: Schema.Unknown
1245
1319
  });
1246
1320
  var ToolResultContentBlockSchema = Schema.Struct({
1321
+ /** Content type identifier */
1247
1322
  type: Schema.Literal("tool_result"),
1323
+ /** ID of tool call this result corresponds to */
1248
1324
  tool_use_id: Schema.String,
1325
+ /** Tool result/output content */
1249
1326
  content: Schema.String
1250
1327
  });
1251
1328
  var makeCacheable = (text) => ({
@@ -1254,32 +1331,51 @@ var makeCacheable = (text) => ({
1254
1331
  cache_control: { type: "ephemeral" }
1255
1332
  });
1256
1333
  var TokenUsageSchema = Schema.Struct({
1334
+ /** Tokens consumed by the input (messages + system prompt) */
1257
1335
  inputTokens: Schema.Number,
1336
+ /** Tokens generated in the response */
1258
1337
  outputTokens: Schema.Number,
1338
+ /** Sum of input and output tokens */
1259
1339
  totalTokens: Schema.Number,
1340
+ /** Estimated cost in USD based on provider pricing */
1260
1341
  estimatedCost: Schema.Number
1261
1342
  });
1262
1343
  var StopReasonSchema = Schema.Literal(
1344
+ /** Model concluded naturally — full response present. */
1263
1345
  "end_turn",
1346
+ /** Hit `maxTokens` limit — response may be truncated. */
1264
1347
  "max_tokens",
1348
+ /** Hit a configured stop sequence — generation halted by design. */
1265
1349
  "stop_sequence",
1350
+ /** Model is invoking a tool — `toolCalls` array is populated. */
1266
1351
  "tool_use"
1267
1352
  );
1268
1353
  var ToolDefinitionSchema = Schema.Struct({
1354
+ /** Tool identifier (used by model to invoke the tool) */
1269
1355
  name: Schema.String,
1356
+ /** Human-readable tool description for the model */
1270
1357
  description: Schema.String,
1358
+ /** Input schema describing expected parameters (JSON Schema format) */
1271
1359
  inputSchema: Schema.Record({ key: Schema.String, value: Schema.Unknown })
1272
1360
  });
1273
1361
  var ToolCallSchema = Schema.Struct({
1362
+ /** Unique tool call identifier (generated by model) */
1274
1363
  id: Schema.String,
1364
+ /** Tool name to invoke */
1275
1365
  name: Schema.String,
1366
+ /** Tool input parameters (arbitrary JSON-compatible object) */
1276
1367
  input: Schema.Unknown
1277
1368
  });
1278
1369
  var CompletionResponseSchema = Schema.Struct({
1370
+ /** Generated response content (text only, no content blocks) */
1279
1371
  content: Schema.String,
1372
+ /** Why the model stopped generating */
1280
1373
  stopReason: StopReasonSchema,
1374
+ /** Token usage statistics */
1281
1375
  usage: TokenUsageSchema,
1376
+ /** Actual model identifier used (may differ from request) */
1282
1377
  model: Schema.String,
1378
+ /** Tool calls emitted by the model (if any) */
1283
1379
  toolCalls: Schema.optional(Schema.Array(ToolCallSchema))
1284
1380
  });
1285
1381
 
@@ -2068,12 +2164,34 @@ var mapOpenAIResponse = (response, model) => {
2068
2164
 
2069
2165
  // src/providers/local.ts
2070
2166
  import { Effect as Effect6, Layer as Layer5, Stream as Stream3, Schema as Schema4 } from "effect";
2071
- var toOllamaMessages = (messages) => messages.filter((m) => m.role !== "tool").map((m) => ({
2072
- role: m.role,
2073
- content: typeof m.content === "string" ? m.content : m.content.filter(
2074
- (b) => b.type === "text"
2075
- ).map((b) => b.text).join("")
2076
- }));
2167
+ var toOllamaMessages = (messages) => messages.map((m) => {
2168
+ if (m.role === "tool") {
2169
+ return { role: "tool", content: m.content };
2170
+ }
2171
+ if (m.role === "assistant") {
2172
+ const textContent = typeof m.content === "string" ? m.content : m.content.filter((b) => b.type === "text").map((b) => b.text).join("");
2173
+ const toolUseBlocks = typeof m.content !== "string" ? m.content.filter(
2174
+ (b) => b.type === "tool_use"
2175
+ ) : [];
2176
+ return {
2177
+ role: "assistant",
2178
+ content: textContent,
2179
+ ...toolUseBlocks.length > 0 ? {
2180
+ tool_calls: toolUseBlocks.map((tc) => ({
2181
+ function: {
2182
+ name: tc.name,
2183
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
2184
+ arguments: tc.input ?? {}
2185
+ }
2186
+ }))
2187
+ } : {}
2188
+ };
2189
+ }
2190
+ return {
2191
+ role: m.role,
2192
+ content: typeof m.content === "string" ? m.content : m.content.filter((b) => b.type === "text").map((b) => b.text).join("")
2193
+ };
2194
+ });
2077
2195
  var toOllamaTools = (tools) => {
2078
2196
  if (!tools || tools.length === 0) return void 0;
2079
2197
  return tools.map((t) => ({
@@ -2657,10 +2775,349 @@ No markdown, no code fences, just raw JSON.`
2657
2775
  })
2658
2776
  );
2659
2777
 
2660
- // src/testing.ts
2778
+ // src/providers/litellm.ts
2661
2779
  import { Effect as Effect8, Layer as Layer7, Stream as Stream5, Schema as Schema6 } from "effect";
2780
+ var toLiteLLMMessages = (messages) => messages.map((m) => {
2781
+ if (m.role === "tool") {
2782
+ return {
2783
+ role: "tool",
2784
+ tool_call_id: m.toolCallId,
2785
+ content: m.content
2786
+ };
2787
+ }
2788
+ return {
2789
+ role: m.role,
2790
+ content: typeof m.content === "string" ? m.content : m.content.filter(
2791
+ (b) => b.type === "text"
2792
+ ).map((b) => b.text).join("")
2793
+ };
2794
+ });
2795
+ var toEffectError4 = (error) => {
2796
+ const err = error;
2797
+ if (err.status === 429) {
2798
+ return new LLMRateLimitError({
2799
+ message: err.message ?? "Rate limit exceeded",
2800
+ provider: "litellm",
2801
+ retryAfterMs: 6e4
2802
+ });
2803
+ }
2804
+ return new LLMError({
2805
+ message: err.message ?? String(error),
2806
+ provider: "litellm",
2807
+ cause: error
2808
+ });
2809
+ };
2810
+ var toLiteLLMTool = (tool) => ({
2811
+ type: "function",
2812
+ function: {
2813
+ name: tool.name,
2814
+ description: tool.description,
2815
+ parameters: tool.inputSchema
2816
+ }
2817
+ });
2818
+ var mapLiteLLMResponse = (response, model) => {
2819
+ const message = response.choices[0]?.message;
2820
+ const content = message?.content ?? "";
2821
+ const rawToolCalls = message?.tool_calls;
2822
+ const hasToolCalls = rawToolCalls && rawToolCalls.length > 0;
2823
+ const stopReason = response.choices[0]?.finish_reason === "tool_calls" || hasToolCalls ? "tool_use" : response.choices[0]?.finish_reason === "stop" ? "end_turn" : response.choices[0]?.finish_reason === "length" ? "max_tokens" : "end_turn";
2824
+ const toolCalls = hasToolCalls ? rawToolCalls.map((tc) => {
2825
+ let input;
2826
+ try {
2827
+ input = JSON.parse(tc.function.arguments);
2828
+ } catch {
2829
+ input = { raw: tc.function.arguments };
2830
+ }
2831
+ return { id: tc.id, name: tc.function.name, input };
2832
+ }) : void 0;
2833
+ return {
2834
+ content,
2835
+ stopReason,
2836
+ usage: {
2837
+ inputTokens: response.usage?.prompt_tokens ?? 0,
2838
+ outputTokens: response.usage?.completion_tokens ?? 0,
2839
+ totalTokens: response.usage?.total_tokens ?? 0,
2840
+ estimatedCost: calculateCost(
2841
+ response.usage?.prompt_tokens ?? 0,
2842
+ response.usage?.completion_tokens ?? 0,
2843
+ model
2844
+ )
2845
+ },
2846
+ model: response.model ?? model,
2847
+ toolCalls
2848
+ };
2849
+ };
2850
+ var liteLLMFetch = async (baseURL, path, body, apiKey) => {
2851
+ const headers = {
2852
+ "Content-Type": "application/json"
2853
+ };
2854
+ if (apiKey) headers["Authorization"] = `Bearer ${apiKey}`;
2855
+ const res = await fetch(`${baseURL}${path}`, {
2856
+ method: "POST",
2857
+ headers,
2858
+ body: JSON.stringify(body)
2859
+ });
2860
+ if (!res.ok) {
2861
+ const text = await res.text().catch(() => "");
2862
+ throw Object.assign(
2863
+ new Error(`LiteLLM ${res.status}: ${text || res.statusText}`),
2864
+ { status: res.status }
2865
+ );
2866
+ }
2867
+ return res.json();
2868
+ };
2869
+ var LiteLLMProviderLive = Layer7.effect(
2870
+ LLMService,
2871
+ Effect8.gen(function* () {
2872
+ const config = yield* LLMConfig;
2873
+ const baseURL = config.litellmBaseUrl ?? process.env.LITELLM_BASE_URL ?? "http://localhost:4000";
2874
+ const apiKey = config.litellmApiKey ?? process.env.LITELLM_API_KEY ?? void 0;
2875
+ const defaultModel = config.defaultModel;
2876
+ return LLMService.of({
2877
+ complete: (request) => Effect8.gen(function* () {
2878
+ const model = typeof request.model === "string" ? request.model : request.model?.model ?? defaultModel;
2879
+ const messages = toLiteLLMMessages(request.messages);
2880
+ if (request.systemPrompt) {
2881
+ messages.unshift({ role: "system", content: request.systemPrompt });
2882
+ }
2883
+ const requestBody = {
2884
+ model,
2885
+ max_tokens: request.maxTokens ?? config.defaultMaxTokens,
2886
+ temperature: request.temperature ?? config.defaultTemperature,
2887
+ messages,
2888
+ stop: request.stopSequences ? [...request.stopSequences] : void 0
2889
+ };
2890
+ if (request.tools && request.tools.length > 0) {
2891
+ requestBody.tools = request.tools.map(toLiteLLMTool);
2892
+ }
2893
+ const response = yield* Effect8.tryPromise({
2894
+ try: () => liteLLMFetch(baseURL, "/chat/completions", requestBody, apiKey),
2895
+ catch: (error) => toEffectError4(error)
2896
+ });
2897
+ return mapLiteLLMResponse(response, model);
2898
+ }).pipe(
2899
+ Effect8.retry(retryPolicy),
2900
+ Effect8.timeout("30 seconds"),
2901
+ Effect8.catchTag(
2902
+ "TimeoutException",
2903
+ () => Effect8.fail(
2904
+ new LLMTimeoutError({
2905
+ message: "LLM request timed out",
2906
+ provider: "litellm",
2907
+ timeoutMs: 3e4
2908
+ })
2909
+ )
2910
+ )
2911
+ ),
2912
+ stream: (request) => Effect8.gen(function* () {
2913
+ const model = typeof request.model === "string" ? request.model : request.model?.model ?? defaultModel;
2914
+ return Stream5.async((emit) => {
2915
+ const doStream = async () => {
2916
+ try {
2917
+ const headers = {
2918
+ "Content-Type": "application/json"
2919
+ };
2920
+ if (apiKey) headers["Authorization"] = `Bearer ${apiKey}`;
2921
+ const messages = toLiteLLMMessages(request.messages);
2922
+ if (request.systemPrompt) {
2923
+ messages.unshift({
2924
+ role: "system",
2925
+ content: request.systemPrompt
2926
+ });
2927
+ }
2928
+ const res = await fetch(`${baseURL}/chat/completions`, {
2929
+ method: "POST",
2930
+ headers,
2931
+ body: JSON.stringify({
2932
+ model,
2933
+ max_tokens: request.maxTokens ?? config.defaultMaxTokens,
2934
+ temperature: request.temperature ?? config.defaultTemperature,
2935
+ messages,
2936
+ stream: true
2937
+ })
2938
+ });
2939
+ if (!res.ok || !res.body) {
2940
+ throw new Error(`LiteLLM stream error: ${res.status}`);
2941
+ }
2942
+ const reader = res.body.getReader();
2943
+ const decoder = new TextDecoder();
2944
+ let buffer = "";
2945
+ let fullContent = "";
2946
+ while (true) {
2947
+ const { done, value } = await reader.read();
2948
+ if (done) break;
2949
+ buffer += decoder.decode(value, { stream: true });
2950
+ const lines = buffer.split("\n");
2951
+ buffer = lines.pop() ?? "";
2952
+ for (const line of lines) {
2953
+ const trimmed = line.trim();
2954
+ if (!trimmed.startsWith("data:")) continue;
2955
+ const data = trimmed.slice(5).trim();
2956
+ if (data === "[DONE]") {
2957
+ emit.single({
2958
+ type: "content_complete",
2959
+ content: fullContent
2960
+ });
2961
+ emit.end();
2962
+ return;
2963
+ }
2964
+ try {
2965
+ const chunk = JSON.parse(data);
2966
+ const delta = chunk.choices[0]?.delta?.content;
2967
+ if (delta) {
2968
+ fullContent += delta;
2969
+ emit.single({ type: "text_delta", text: delta });
2970
+ }
2971
+ if (chunk.choices[0]?.finish_reason) {
2972
+ const inputTokens = chunk.usage?.prompt_tokens ?? 0;
2973
+ const outputTokens = chunk.usage?.completion_tokens ?? 0;
2974
+ emit.single({
2975
+ type: "usage",
2976
+ usage: {
2977
+ inputTokens,
2978
+ outputTokens,
2979
+ totalTokens: inputTokens + outputTokens,
2980
+ estimatedCost: calculateCost(
2981
+ inputTokens,
2982
+ outputTokens,
2983
+ model
2984
+ )
2985
+ }
2986
+ });
2987
+ }
2988
+ } catch {
2989
+ }
2990
+ }
2991
+ }
2992
+ } catch (error) {
2993
+ const err = error;
2994
+ emit.fail(
2995
+ new LLMError({
2996
+ message: err.message ?? String(error),
2997
+ provider: "litellm",
2998
+ cause: error
2999
+ })
3000
+ );
3001
+ }
3002
+ };
3003
+ void doStream();
3004
+ });
3005
+ }),
3006
+ completeStructured: (request) => Effect8.gen(function* () {
3007
+ const schemaStr = JSON.stringify(
3008
+ Schema6.encodedSchema(request.outputSchema),
3009
+ null,
3010
+ 2
3011
+ );
3012
+ const messagesWithFormat = [
3013
+ ...request.messages,
3014
+ {
3015
+ role: "user",
3016
+ content: `
3017
+ Respond with ONLY valid JSON matching this schema:
3018
+ ${schemaStr}
3019
+
3020
+ No markdown, no code fences, just raw JSON.`
3021
+ }
3022
+ ];
3023
+ let lastError = null;
3024
+ const maxRetries = request.maxParseRetries ?? 2;
3025
+ for (let attempt = 0; attempt <= maxRetries; attempt++) {
3026
+ const msgs = attempt === 0 ? messagesWithFormat : [
3027
+ ...messagesWithFormat,
3028
+ {
3029
+ role: "assistant",
3030
+ content: String(lastError)
3031
+ },
3032
+ {
3033
+ role: "user",
3034
+ content: `That response was not valid JSON. The parse error was: ${String(lastError)}. Please try again with valid JSON only.`
3035
+ }
3036
+ ];
3037
+ const model = typeof request.model === "string" ? request.model : request.model?.model ?? defaultModel;
3038
+ const completeResult = yield* Effect8.tryPromise({
3039
+ try: () => liteLLMFetch(
3040
+ baseURL,
3041
+ "/chat/completions",
3042
+ {
3043
+ model,
3044
+ max_tokens: request.maxTokens ?? config.defaultMaxTokens,
3045
+ temperature: request.temperature ?? config.defaultTemperature,
3046
+ messages: toLiteLLMMessages(msgs)
3047
+ },
3048
+ apiKey
3049
+ ),
3050
+ catch: (error) => toEffectError4(error)
3051
+ });
3052
+ const response = mapLiteLLMResponse(
3053
+ completeResult,
3054
+ model
3055
+ );
3056
+ try {
3057
+ const parsed = JSON.parse(response.content);
3058
+ const decoded = Schema6.decodeUnknownEither(
3059
+ request.outputSchema
3060
+ )(parsed);
3061
+ if (decoded._tag === "Right") {
3062
+ return decoded.right;
3063
+ }
3064
+ lastError = decoded.left;
3065
+ } catch (e) {
3066
+ lastError = e;
3067
+ }
3068
+ }
3069
+ return yield* Effect8.fail(
3070
+ new LLMParseError({
3071
+ message: `Failed to parse structured output after ${maxRetries + 1} attempts`,
3072
+ rawOutput: String(lastError),
3073
+ expectedSchema: schemaStr
3074
+ })
3075
+ );
3076
+ }),
3077
+ embed: (texts, model) => Effect8.tryPromise({
3078
+ try: async () => {
3079
+ const embeddingModel = model ?? config.embeddingConfig.model;
3080
+ const batchSize = config.embeddingConfig.batchSize ?? 100;
3081
+ const results = [];
3082
+ for (let i = 0; i < texts.length; i += batchSize) {
3083
+ const batch = texts.slice(i, i + batchSize);
3084
+ const response = await liteLLMFetch(
3085
+ baseURL,
3086
+ "/embeddings",
3087
+ {
3088
+ model: embeddingModel,
3089
+ input: [...batch],
3090
+ dimensions: config.embeddingConfig.dimensions
3091
+ },
3092
+ apiKey
3093
+ );
3094
+ results.push(
3095
+ ...response.data.map((d) => d.embedding)
3096
+ );
3097
+ }
3098
+ return results;
3099
+ },
3100
+ catch: (error) => new LLMError({
3101
+ message: `Embedding failed: ${error}`,
3102
+ provider: "litellm",
3103
+ cause: error
3104
+ })
3105
+ }),
3106
+ countTokens: (messages) => Effect8.gen(function* () {
3107
+ return yield* estimateTokenCount(messages);
3108
+ }),
3109
+ getModelConfig: () => Effect8.succeed({
3110
+ provider: "litellm",
3111
+ model: defaultModel
3112
+ })
3113
+ });
3114
+ })
3115
+ );
3116
+
3117
+ // src/testing.ts
3118
+ import { Effect as Effect9, Layer as Layer8, Stream as Stream6, Schema as Schema7 } from "effect";
2662
3119
  var TestLLMService = (responses) => ({
2663
- complete: (request) => Effect8.gen(function* () {
3120
+ complete: (request) => Effect9.gen(function* () {
2664
3121
  const lastMessage = request.messages[request.messages.length - 1];
2665
3122
  const content = lastMessage && typeof lastMessage.content === "string" ? lastMessage.content : "";
2666
3123
  const systemPrompt = typeof request.systemPrompt === "string" ? request.systemPrompt : "";
@@ -2692,8 +3149,8 @@ var TestLLMService = (responses) => ({
2692
3149
  model: "test-model"
2693
3150
  };
2694
3151
  }),
2695
- stream: (_request) => Effect8.succeed(
2696
- Stream5.make(
3152
+ stream: (_request) => Effect9.succeed(
3153
+ Stream6.make(
2697
3154
  { type: "text_delta", text: "Test " },
2698
3155
  { type: "text_delta", text: "response" },
2699
3156
  {
@@ -2711,7 +3168,7 @@ var TestLLMService = (responses) => ({
2711
3168
  }
2712
3169
  )
2713
3170
  ),
2714
- completeStructured: (request) => Effect8.gen(function* () {
3171
+ completeStructured: (request) => Effect9.gen(function* () {
2715
3172
  const lastMessage = request.messages[request.messages.length - 1];
2716
3173
  const content = lastMessage && typeof lastMessage.content === "string" ? lastMessage.content : "";
2717
3174
  let responseContent = "Test response";
@@ -2722,109 +3179,109 @@ var TestLLMService = (responses) => ({
2722
3179
  }
2723
3180
  }
2724
3181
  const parsed = JSON.parse(responseContent);
2725
- return Schema6.decodeUnknownSync(request.outputSchema)(parsed);
3182
+ return Schema7.decodeUnknownSync(request.outputSchema)(parsed);
2726
3183
  }),
2727
- embed: (texts) => Effect8.succeed(
3184
+ embed: (texts) => Effect9.succeed(
2728
3185
  texts.map(() => new Array(768).fill(0).map(() => Math.random()))
2729
3186
  ),
2730
- countTokens: (messages) => Effect8.succeed(
3187
+ countTokens: (messages) => Effect9.succeed(
2731
3188
  messages.reduce(
2732
3189
  (sum, m) => sum + (typeof m.content === "string" ? Math.ceil(m.content.length / 4) : 100),
2733
3190
  0
2734
3191
  )
2735
3192
  ),
2736
- getModelConfig: () => Effect8.succeed({
3193
+ getModelConfig: () => Effect9.succeed({
2737
3194
  provider: "anthropic",
2738
3195
  model: "test-model"
2739
3196
  })
2740
3197
  });
2741
- var TestLLMServiceLayer = (responses = {}) => Layer7.succeed(LLMService, LLMService.of(TestLLMService(responses)));
3198
+ var TestLLMServiceLayer = (responses = {}) => Layer8.succeed(LLMService, LLMService.of(TestLLMService(responses)));
2742
3199
 
2743
3200
  // src/structured-output.ts
2744
- import { Schema as Schema7 } from "effect";
2745
- var ReActActionSchema = Schema7.Struct({
2746
- thought: Schema7.String,
2747
- action: Schema7.optional(
2748
- Schema7.Struct({
2749
- tool: Schema7.String,
2750
- input: Schema7.Unknown
3201
+ import { Schema as Schema8 } from "effect";
3202
+ var ReActActionSchema = Schema8.Struct({
3203
+ thought: Schema8.String,
3204
+ action: Schema8.optional(
3205
+ Schema8.Struct({
3206
+ tool: Schema8.String,
3207
+ input: Schema8.Unknown
2751
3208
  })
2752
3209
  ),
2753
- finalAnswer: Schema7.optional(Schema7.String),
2754
- isComplete: Schema7.Boolean
3210
+ finalAnswer: Schema8.optional(Schema8.String),
3211
+ isComplete: Schema8.Boolean
2755
3212
  });
2756
- var PlanSchema = Schema7.Struct({
2757
- goal: Schema7.String,
2758
- steps: Schema7.Array(
2759
- Schema7.Struct({
2760
- id: Schema7.Number,
2761
- description: Schema7.String,
2762
- tool: Schema7.optional(Schema7.String),
2763
- dependsOn: Schema7.optional(Schema7.Array(Schema7.Number)),
2764
- estimatedDuration: Schema7.optional(Schema7.String)
3213
+ var PlanSchema = Schema8.Struct({
3214
+ goal: Schema8.String,
3215
+ steps: Schema8.Array(
3216
+ Schema8.Struct({
3217
+ id: Schema8.Number,
3218
+ description: Schema8.String,
3219
+ tool: Schema8.optional(Schema8.String),
3220
+ dependsOn: Schema8.optional(Schema8.Array(Schema8.Number)),
3221
+ estimatedDuration: Schema8.optional(Schema8.String)
2765
3222
  })
2766
3223
  )
2767
3224
  });
2768
- var ReflectionSchema = Schema7.Struct({
2769
- taskAccomplished: Schema7.Boolean,
2770
- confidence: Schema7.Number,
2771
- strengths: Schema7.Array(Schema7.String),
2772
- weaknesses: Schema7.Array(Schema7.String),
2773
- needsRefinement: Schema7.Boolean,
2774
- refinementSuggestions: Schema7.optional(Schema7.Array(Schema7.String))
3225
+ var ReflectionSchema = Schema8.Struct({
3226
+ taskAccomplished: Schema8.Boolean,
3227
+ confidence: Schema8.Number,
3228
+ strengths: Schema8.Array(Schema8.String),
3229
+ weaknesses: Schema8.Array(Schema8.String),
3230
+ needsRefinement: Schema8.Boolean,
3231
+ refinementSuggestions: Schema8.optional(Schema8.Array(Schema8.String))
2775
3232
  });
2776
- var StrategySelectionSchema = Schema7.Struct({
2777
- selectedStrategy: Schema7.String,
2778
- reasoning: Schema7.String,
2779
- confidence: Schema7.Number,
2780
- alternativeStrategies: Schema7.Array(
2781
- Schema7.Struct({
2782
- strategy: Schema7.String,
2783
- whyNot: Schema7.String
3233
+ var StrategySelectionSchema = Schema8.Struct({
3234
+ selectedStrategy: Schema8.String,
3235
+ reasoning: Schema8.String,
3236
+ confidence: Schema8.Number,
3237
+ alternativeStrategies: Schema8.Array(
3238
+ Schema8.Struct({
3239
+ strategy: Schema8.String,
3240
+ whyNot: Schema8.String
2784
3241
  })
2785
3242
  )
2786
3243
  });
2787
- var ThoughtEvaluationSchema = Schema7.Struct({
2788
- score: Schema7.Number,
2789
- reasoning: Schema7.String,
2790
- strengths: Schema7.Array(Schema7.String),
2791
- weaknesses: Schema7.Array(Schema7.String),
2792
- shouldExpand: Schema7.Boolean
3244
+ var ThoughtEvaluationSchema = Schema8.Struct({
3245
+ score: Schema8.Number,
3246
+ reasoning: Schema8.String,
3247
+ strengths: Schema8.Array(Schema8.String),
3248
+ weaknesses: Schema8.Array(Schema8.String),
3249
+ shouldExpand: Schema8.Boolean
2793
3250
  });
2794
- var ComplexityAnalysisSchema = Schema7.Struct({
2795
- score: Schema7.Number,
2796
- factors: Schema7.Array(
2797
- Schema7.Struct({
2798
- factor: Schema7.String,
2799
- weight: Schema7.Number,
2800
- reasoning: Schema7.String
3251
+ var ComplexityAnalysisSchema = Schema8.Struct({
3252
+ score: Schema8.Number,
3253
+ factors: Schema8.Array(
3254
+ Schema8.Struct({
3255
+ factor: Schema8.String,
3256
+ weight: Schema8.Number,
3257
+ reasoning: Schema8.String
2801
3258
  })
2802
3259
  ),
2803
- recommendedStrategy: Schema7.String,
2804
- recommendedModel: Schema7.String
3260
+ recommendedStrategy: Schema8.String,
3261
+ recommendedModel: Schema8.String
2805
3262
  });
2806
3263
 
2807
3264
  // src/runtime.ts
2808
- import { Layer as Layer8 } from "effect";
3265
+ import { Layer as Layer9 } from "effect";
2809
3266
  var createLLMProviderLayer = (provider = "anthropic", testResponses, model) => {
2810
3267
  if (provider === "test") {
2811
- return Layer8.mergeAll(
3268
+ return Layer9.mergeAll(
2812
3269
  TestLLMServiceLayer(testResponses ?? {}),
2813
3270
  PromptManagerLive
2814
3271
  );
2815
3272
  }
2816
- const configLayer = model ? Layer8.succeed(LLMConfig, LLMConfig.of({ ...llmConfigFromEnv, defaultModel: model })) : LLMConfigFromEnv;
2817
- const providerLayer = provider === "anthropic" ? AnthropicProviderLive : provider === "openai" ? OpenAIProviderLive : provider === "gemini" ? GeminiProviderLive : LocalProviderLive;
2818
- return Layer8.mergeAll(
2819
- providerLayer.pipe(Layer8.provide(configLayer)),
3273
+ const configLayer = model ? Layer9.succeed(LLMConfig, LLMConfig.of({ ...llmConfigFromEnv, defaultModel: model })) : LLMConfigFromEnv;
3274
+ const providerLayer = provider === "anthropic" ? AnthropicProviderLive : provider === "openai" ? OpenAIProviderLive : provider === "gemini" ? GeminiProviderLive : provider === "litellm" ? LiteLLMProviderLive : LocalProviderLive;
3275
+ return Layer9.mergeAll(
3276
+ providerLayer.pipe(Layer9.provide(configLayer)),
2820
3277
  PromptManagerLive
2821
3278
  );
2822
3279
  };
2823
3280
  var createLLMProviderLayerWithConfig = (config, provider = "anthropic") => {
2824
- const configLayer = Layer8.succeed(LLMConfig, config);
2825
- const providerLayer = provider === "anthropic" ? AnthropicProviderLive : provider === "openai" ? OpenAIProviderLive : provider === "gemini" ? GeminiProviderLive : LocalProviderLive;
2826
- return Layer8.mergeAll(
2827
- providerLayer.pipe(Layer8.provide(configLayer)),
3281
+ const configLayer = Layer9.succeed(LLMConfig, config);
3282
+ const providerLayer = provider === "anthropic" ? AnthropicProviderLive : provider === "openai" ? OpenAIProviderLive : provider === "gemini" ? GeminiProviderLive : provider === "litellm" ? LiteLLMProviderLive : LocalProviderLive;
3283
+ return Layer9.mergeAll(
3284
+ providerLayer.pipe(Layer9.provide(configLayer)),
2828
3285
  PromptManagerLive
2829
3286
  );
2830
3287
  };
@@ -2847,6 +3304,7 @@ export {
2847
3304
  LLMRateLimitError,
2848
3305
  LLMService,
2849
3306
  LLMTimeoutError,
3307
+ LiteLLMProviderLive,
2850
3308
  LocalProviderLive,
2851
3309
  ModelConfigSchema,
2852
3310
  ModelPresets,