@reactive-agents/llm-provider 0.7.8 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1120,6 +1120,14 @@ var init_dist = __esm({
1120
1120
  }
1121
1121
  });
1122
1122
 
1123
+ // src/capabilities.ts
1124
+ var DEFAULT_CAPABILITIES = {
1125
+ supportsToolCalling: false,
1126
+ supportsStreaming: true,
1127
+ supportsStructuredOutput: false,
1128
+ supportsLogprobs: false
1129
+ };
1130
+
1123
1131
  // src/types.ts
1124
1132
  import { Schema } from "effect";
1125
1133
  var LLMProviderType = Schema.Literal(
@@ -1378,7 +1386,24 @@ var CompletionResponseSchema = Schema.Struct({
1378
1386
  /** Tool calls emitted by the model (if any) */
1379
1387
  toolCalls: Schema.optional(Schema.Array(ToolCallSchema)),
1380
1388
  /** Internal reasoning from thinking models (e.g. <think> blocks from qwen3, DeepSeek-R1) */
1381
- thinking: Schema.optional(Schema.String)
1389
+ thinking: Schema.optional(Schema.String),
1390
+ /** Token-level log probabilities (when requested via logprobs in CompletionRequest) */
1391
+ logprobs: Schema.optional(
1392
+ Schema.Array(
1393
+ Schema.Struct({
1394
+ token: Schema.String,
1395
+ logprob: Schema.Number,
1396
+ topLogprobs: Schema.optional(
1397
+ Schema.Array(
1398
+ Schema.Struct({
1399
+ token: Schema.String,
1400
+ logprob: Schema.Number
1401
+ })
1402
+ )
1403
+ )
1404
+ })
1405
+ )
1406
+ )
1382
1407
  });
1383
1408
 
1384
1409
  // src/errors.ts
@@ -1418,12 +1443,16 @@ var llmConfigFromEnv = LLMConfig.of({
1418
1443
  provider: process.env.EMBEDDING_PROVIDER ?? "openai",
1419
1444
  batchSize: 100
1420
1445
  },
1421
- supportsPromptCaching: (process.env.LLM_DEFAULT_MODEL || "claude-sonnet-4-20250514").startsWith("claude"),
1446
+ supportsPromptCaching: (() => {
1447
+ const m = process.env.LLM_DEFAULT_MODEL || "claude-sonnet-4-20250514";
1448
+ return m.startsWith("claude") || m.startsWith("gemini") || m.startsWith("gpt");
1449
+ })(),
1422
1450
  maxRetries: Number(process.env.LLM_MAX_RETRIES ?? 3),
1423
1451
  timeoutMs: Number(process.env.LLM_TIMEOUT_MS ?? 3e4),
1424
1452
  defaultMaxTokens: 4096,
1425
1453
  defaultTemperature: Number(process.env.LLM_DEFAULT_TEMPERATURE ?? 0.7),
1426
- observabilityVerbosity: process.env.LLM_OBSERVABILITY_VERBOSITY ?? "full"
1454
+ observabilityVerbosity: process.env.LLM_OBSERVABILITY_VERBOSITY ?? "full",
1455
+ pricingRegistry: {}
1427
1456
  });
1428
1457
  var LLMConfigFromEnv = Layer.succeed(LLMConfig, llmConfigFromEnv);
1429
1458
 
@@ -1463,20 +1492,84 @@ var estimateTokenCount = (messages) => Effect2.sync(() => {
1463
1492
  }
1464
1493
  return totalTokens;
1465
1494
  });
1466
- var calculateCost = (inputTokens, outputTokens, model) => {
1495
+ function getPricing(model, registry, pricing) {
1496
+ if (pricing?.input !== void 0 && pricing?.output !== void 0) {
1497
+ return { input: pricing.input, output: pricing.output };
1498
+ }
1499
+ if (registry && registry[model]) return registry[model];
1467
1500
  const costMap = {
1468
- "claude-3-5-haiku-20241022": { input: 1, output: 5 },
1501
+ // ── Anthropic ──
1502
+ "claude-3-5-haiku-20241022": { input: 0.8, output: 4 },
1503
+ "claude-3-haiku-20240307": { input: 0.25, output: 1.25 },
1469
1504
  "claude-sonnet-4-20250514": { input: 3, output: 15 },
1470
1505
  "claude-sonnet-4-5-20250929": { input: 3, output: 15 },
1471
1506
  "claude-opus-4-20250514": { input: 15, output: 75 },
1472
- "gpt-4o-mini": { input: 0.15, output: 0.6 },
1507
+ "claude-3-5-sonnet-20241022": { input: 3, output: 15 },
1508
+ "claude-3-5-sonnet-20240620": { input: 3, output: 15 },
1509
+ "claude-3-opus-20240229": { input: 15, output: 75 },
1510
+ "claude-3-sonnet-20240229": { input: 3, output: 15 },
1511
+ // ── OpenAI ──
1473
1512
  "gpt-4o": { input: 2.5, output: 10 },
1513
+ "gpt-4o-2024-11-20": { input: 2.5, output: 10 },
1514
+ "gpt-4o-2024-08-06": { input: 2.5, output: 10 },
1515
+ "gpt-4o-2024-05-13": { input: 5, output: 15 },
1516
+ "gpt-4o-mini": { input: 0.15, output: 0.6 },
1517
+ "gpt-4o-mini-2024-07-18": { input: 0.15, output: 0.6 },
1518
+ "gpt-4-turbo": { input: 10, output: 30 },
1519
+ "gpt-4-turbo-2024-04-09": { input: 10, output: 30 },
1520
+ "gpt-4": { input: 30, output: 60 },
1521
+ "gpt-4-0613": { input: 30, output: 60 },
1522
+ "gpt-3.5-turbo": { input: 0.5, output: 1.5 },
1523
+ "o1": { input: 15, output: 60 },
1524
+ "o1-mini": { input: 3, output: 12 },
1525
+ "o1-preview": { input: 15, output: 60 },
1526
+ "o3": { input: 10, output: 40 },
1527
+ "o3-mini": { input: 1.1, output: 4.4 },
1528
+ "o4-mini": { input: 1.1, output: 4.4 },
1529
+ // ── Google Gemini ──
1474
1530
  "gemini-2.0-flash": { input: 0.1, output: 0.4 },
1531
+ "gemini-2.5-flash": { input: 0.15, output: 0.6 },
1532
+ "gemini-2.5-flash-preview-05-20": { input: 0.15, output: 0.6 },
1533
+ "gemini-2.5-pro": { input: 1.25, output: 10 },
1475
1534
  "gemini-2.5-pro-preview-03-25": { input: 1.25, output: 10 },
1476
- "gemini-embedding-001": { input: 0, output: 0 }
1535
+ "gemini-2.5-pro-preview-05-06": { input: 1.25, output: 10 },
1536
+ "gemini-1.5-pro": { input: 1.25, output: 5 },
1537
+ "gemini-1.5-flash": { input: 0.075, output: 0.3 },
1538
+ "gemini-embedding-001": { input: 0, output: 0 },
1539
+ // ── Meta Llama (via LiteLLM / cloud providers) ──
1540
+ "llama-3.1-405b": { input: 3, output: 3 },
1541
+ "llama-3.1-70b": { input: 0.88, output: 0.88 },
1542
+ "llama-3.1-8b": { input: 0.18, output: 0.18 },
1543
+ "llama-3.3-70b": { input: 0.88, output: 0.88 },
1544
+ // ── Mistral ──
1545
+ "mistral-large-latest": { input: 2, output: 6 },
1546
+ "mistral-small-latest": { input: 0.2, output: 0.6 },
1547
+ "codestral-latest": { input: 0.3, output: 0.9 }
1477
1548
  };
1478
- const costs = costMap[model] ?? { input: 3, output: 15 };
1479
- return inputTokens / 1e6 * costs.input + outputTokens / 1e6 * costs.output;
1549
+ if (costMap[model]) return costMap[model];
1550
+ const m = model.toLowerCase();
1551
+ if (m.includes("haiku") || m.includes("flash") || m.includes("mini") || m.includes("small") || m.includes("8b") || m.includes("7b") || m.includes("lite")) {
1552
+ return { input: 0.15, output: 0.6 };
1553
+ }
1554
+ if (m.includes("opus") || m.includes("large") || m.includes("405b") || m.includes("gpt-4") && !m.includes("turbo") && !m.includes("o-") && !m.includes("mini")) {
1555
+ return { input: 15, output: 75 };
1556
+ }
1557
+ return { input: 3, output: 15 };
1558
+ }
1559
+ var calculateCost = (inputTokens, outputTokens, model, usage, registry, pricing) => {
1560
+ const costs = getPricing(model, registry, pricing);
1561
+ const anthropicCacheRead = usage?.cache_read_input_tokens ?? 0;
1562
+ const anthropicCacheWrite = usage?.cache_creation_input_tokens ?? 0;
1563
+ const openaiCached = usage?.cached_tokens ?? 0;
1564
+ const geminiCached = usage?.cached_content_token_count ?? 0;
1565
+ const baseInputTokens = inputTokens - anthropicCacheRead - anthropicCacheWrite - openaiCached - geminiCached;
1566
+ const inputCost = baseInputTokens / 1e6 * costs.input;
1567
+ const outputCost = outputTokens / 1e6 * costs.output;
1568
+ const anthropicCacheWriteCost = anthropicCacheWrite / 1e6 * costs.input * 1.25;
1569
+ const anthropicCacheReadCost = anthropicCacheRead / 1e6 * costs.input * 0.1;
1570
+ const openaiCachedCost = openaiCached / 1e6 * costs.input * 0.5;
1571
+ const geminiCachedCost = geminiCached / 1e6 * costs.input * 0.25;
1572
+ return inputCost + outputCost + anthropicCacheWriteCost + anthropicCacheReadCost + openaiCachedCost + geminiCachedCost;
1480
1573
  };
1481
1574
 
1482
1575
  // src/prompt-manager.ts
@@ -1614,13 +1707,14 @@ var toAnthropicMessages = (messages) => messages.filter((m) => m.role !== "syste
1614
1707
  )
1615
1708
  };
1616
1709
  });
1617
- var toAnthropicTool = (tool) => ({
1710
+ var toAnthropicTool = (tool, cached = false) => ({
1618
1711
  name: tool.name,
1619
1712
  description: tool.description,
1620
1713
  input_schema: {
1621
1714
  type: "object",
1622
1715
  ...tool.inputSchema
1623
- }
1716
+ },
1717
+ ...cached ? { cache_control: { type: "ephemeral" } } : {}
1624
1718
  });
1625
1719
  var toEffectError = (error, provider) => {
1626
1720
  const err = error;
@@ -1673,11 +1767,13 @@ var AnthropicProviderLive = Layer3.effect(
1673
1767
  system: buildSystemParam(request.systemPrompt),
1674
1768
  messages: toAnthropicMessages(request.messages),
1675
1769
  stop_sequences: request.stopSequences ? [...request.stopSequences] : void 0,
1676
- tools: request.tools?.map(toAnthropicTool)
1770
+ tools: request.tools?.map(
1771
+ (t, i) => toAnthropicTool(t, i === (request.tools?.length ?? 0) - 1)
1772
+ )
1677
1773
  }),
1678
1774
  catch: (error) => toEffectError(error, "anthropic")
1679
1775
  });
1680
- return mapAnthropicResponse(response, model);
1776
+ return mapAnthropicResponse(response, model, config.pricingRegistry);
1681
1777
  }).pipe(
1682
1778
  Effect4.retry(retryPolicy),
1683
1779
  Effect4.timeout("30 seconds"),
@@ -1701,10 +1797,24 @@ var AnthropicProviderLive = Layer3.effect(
1701
1797
  max_tokens: request.maxTokens ?? config.defaultMaxTokens,
1702
1798
  temperature: request.temperature ?? config.defaultTemperature,
1703
1799
  system: buildSystemParam(request.systemPrompt),
1704
- messages: toAnthropicMessages(request.messages)
1800
+ messages: toAnthropicMessages(request.messages),
1801
+ tools: request.tools?.map(
1802
+ (t, i) => toAnthropicTool(t, i === (request.tools?.length ?? 0) - 1)
1803
+ )
1705
1804
  });
1706
- stream.on("text", (text) => {
1707
- emit.single({ type: "text_delta", text });
1805
+ stream.on("streamEvent", (event) => {
1806
+ const e = event;
1807
+ if (e.type === "content_block_delta") {
1808
+ if (e.delta?.type === "text_delta" && e.delta.text) {
1809
+ emit.single({ type: "text_delta", text: e.delta.text });
1810
+ } else if (e.delta?.type === "input_json_delta" && e.delta.partial_json) {
1811
+ emit.single({ type: "tool_use_delta", input: e.delta.partial_json });
1812
+ }
1813
+ } else if (e.type === "content_block_start") {
1814
+ if (e.content_block?.type === "tool_use" && e.content_block.id && e.content_block.name) {
1815
+ emit.single({ type: "tool_use_start", id: e.content_block.id, name: e.content_block.name });
1816
+ }
1817
+ }
1708
1818
  });
1709
1819
  stream.on("finalMessage", (message) => {
1710
1820
  const msg = message;
@@ -1721,7 +1831,12 @@ var AnthropicProviderLive = Layer3.effect(
1721
1831
  estimatedCost: calculateCost(
1722
1832
  msg.usage.input_tokens,
1723
1833
  msg.usage.output_tokens,
1724
- model
1834
+ model,
1835
+ {
1836
+ cache_creation_input_tokens: msg.usage.cache_creation_input_tokens,
1837
+ cache_read_input_tokens: msg.usage.cache_read_input_tokens
1838
+ },
1839
+ config.pricingRegistry
1725
1840
  )
1726
1841
  }
1727
1842
  });
@@ -1867,11 +1982,17 @@ No markdown, no code fences, just raw JSON.`
1867
1982
  jsonSchemaEnforcement: false,
1868
1983
  prefillSupport: true,
1869
1984
  grammarConstraints: false
1985
+ }),
1986
+ capabilities: () => Effect4.succeed({
1987
+ supportsToolCalling: true,
1988
+ supportsStreaming: true,
1989
+ supportsStructuredOutput: true,
1990
+ supportsLogprobs: false
1870
1991
  })
1871
1992
  });
1872
1993
  })
1873
1994
  );
1874
- var mapAnthropicResponse = (response, model) => {
1995
+ var mapAnthropicResponse = (response, model, registry) => {
1875
1996
  const textContent = response.content.filter(
1876
1997
  (b) => b.type === "text"
1877
1998
  ).map((b) => b.text).join("");
@@ -1893,7 +2014,12 @@ var mapAnthropicResponse = (response, model) => {
1893
2014
  estimatedCost: calculateCost(
1894
2015
  response.usage.input_tokens,
1895
2016
  response.usage.output_tokens,
1896
- model
2017
+ model,
2018
+ {
2019
+ cache_creation_input_tokens: response.usage.cache_creation_input_tokens,
2020
+ cache_read_input_tokens: response.usage.cache_read_input_tokens
2021
+ },
2022
+ registry
1897
2023
  )
1898
2024
  },
1899
2025
  model: response.model ?? model,
@@ -1911,6 +2037,28 @@ var toOpenAIMessages = (messages) => messages.map((m) => {
1911
2037
  content: m.content
1912
2038
  };
1913
2039
  }
2040
+ if (m.role === "assistant" && typeof m.content !== "string") {
2041
+ const blocks = m.content;
2042
+ const textParts = blocks.filter((b) => b.type === "text").map((b) => b.text).join("");
2043
+ const toolUseBlocks = blocks.filter(
2044
+ (b) => b.type === "tool_use"
2045
+ );
2046
+ if (toolUseBlocks.length > 0) {
2047
+ return {
2048
+ role: "assistant",
2049
+ content: textParts || "",
2050
+ tool_calls: toolUseBlocks.map((tc) => ({
2051
+ id: tc.id,
2052
+ type: "function",
2053
+ function: {
2054
+ name: tc.name,
2055
+ arguments: typeof tc.input === "string" ? tc.input : JSON.stringify(tc.input)
2056
+ }
2057
+ }))
2058
+ };
2059
+ }
2060
+ return { role: "assistant", content: textParts };
2061
+ }
1914
2062
  return {
1915
2063
  role: m.role,
1916
2064
  content: typeof m.content === "string" ? m.content : m.content.filter(
@@ -1933,12 +2081,49 @@ var toEffectError2 = (error, provider) => {
1933
2081
  cause: error
1934
2082
  });
1935
2083
  };
1936
- var toOpenAITool = (tool) => ({
2084
+ var isStrictToolCallingSupported = (model) => {
2085
+ const m = model.toLowerCase();
2086
+ return m.includes("gpt-4o") && (m.includes("2024-08-06") || m.includes("2024-11-20") || !m.includes("2024-05-13")) || m.includes("gpt-4o-mini") || m.startsWith("o1") || m.startsWith("o3") || m.startsWith("o4");
2087
+ };
2088
+ var toStrictToolSchema = (schema) => {
2089
+ if (!schema || typeof schema !== "object") return schema;
2090
+ const newSchema = JSON.parse(JSON.stringify(schema));
2091
+ if (newSchema.type === "object" && newSchema.properties) {
2092
+ const originalRequired = new Set(newSchema.required ?? []);
2093
+ newSchema.additionalProperties = false;
2094
+ newSchema.required = Object.keys(newSchema.properties);
2095
+ for (const key of Object.keys(newSchema.properties)) {
2096
+ const prop = newSchema.properties[key];
2097
+ if (typeof prop === "object" && prop !== null) {
2098
+ delete prop.default;
2099
+ }
2100
+ if (!originalRequired.has(key) && prop && typeof prop === "object") {
2101
+ if (prop.type && prop.type !== "null" && !prop.anyOf) {
2102
+ prop.anyOf = [{ type: prop.type }, { type: "null" }];
2103
+ delete prop.type;
2104
+ }
2105
+ }
2106
+ if (prop.type === "object" && prop.properties) {
2107
+ newSchema.properties[key] = toStrictToolSchema(prop);
2108
+ } else if (prop.anyOf) {
2109
+ prop.anyOf = prop.anyOf.map(
2110
+ (variant) => variant && variant.type === "object" ? { ...variant, additionalProperties: false } : variant
2111
+ );
2112
+ }
2113
+ if (prop.type === "array" && prop.items && prop.items.type === "object") {
2114
+ newSchema.properties[key].items = toStrictToolSchema(prop.items);
2115
+ }
2116
+ }
2117
+ }
2118
+ return newSchema;
2119
+ };
2120
+ var toOpenAITool = (tool, strict) => ({
1937
2121
  type: "function",
1938
2122
  function: {
1939
2123
  name: tool.name,
1940
2124
  description: tool.description,
1941
- parameters: tool.inputSchema
2125
+ parameters: strict ? toStrictToolSchema(tool.inputSchema) : tool.inputSchema,
2126
+ strict: strict || void 0
1942
2127
  }
1943
2128
  });
1944
2129
  var OpenAIProviderLive = Layer4.effect(
@@ -1970,14 +2155,21 @@ var OpenAIProviderLive = Layer4.effect(
1970
2155
  messages,
1971
2156
  stop: request.stopSequences ? [...request.stopSequences] : void 0
1972
2157
  };
2158
+ if (request.logprobs) {
2159
+ requestBody.logprobs = true;
2160
+ if (request.topLogprobs != null) {
2161
+ requestBody.top_logprobs = request.topLogprobs;
2162
+ }
2163
+ }
1973
2164
  if (request.tools && request.tools.length > 0) {
1974
- requestBody.tools = request.tools.map(toOpenAITool);
2165
+ const strict = isStrictToolCallingSupported(model);
2166
+ requestBody.tools = request.tools.map((t) => toOpenAITool(t, strict));
1975
2167
  }
1976
2168
  const response = yield* Effect5.tryPromise({
1977
2169
  try: () => client.chat.completions.create(requestBody),
1978
2170
  catch: (error) => toEffectError2(error, "openai")
1979
2171
  });
1980
- return mapOpenAIResponse(response, model);
2172
+ return mapOpenAIResponse(response, model, config.pricingRegistry);
1981
2173
  }).pipe(
1982
2174
  Effect5.retry(retryPolicy),
1983
2175
  Effect5.timeout("30 seconds"),
@@ -2009,38 +2201,71 @@ var OpenAIProviderLive = Layer4.effect(
2009
2201
  }
2010
2202
  return msgs;
2011
2203
  })(),
2012
- stream: true
2204
+ tools: request.tools && request.tools.length > 0 ? request.tools.map((t) => toOpenAITool(t, isStrictToolCallingSupported(model))) : void 0,
2205
+ stream: true,
2206
+ stream_options: { include_usage: true }
2013
2207
  });
2014
2208
  let fullContent = "";
2209
+ const toolCallAccum = /* @__PURE__ */ new Map();
2210
+ let finalUsage;
2015
2211
  for await (const chunk of stream) {
2016
2212
  const delta = chunk.choices[0]?.delta?.content;
2017
2213
  if (delta) {
2018
2214
  fullContent += delta;
2019
2215
  emit.single({ type: "text_delta", text: delta });
2020
2216
  }
2217
+ const toolDeltas = chunk.choices[0]?.delta?.tool_calls;
2218
+ if (toolDeltas) {
2219
+ for (const tc of toolDeltas) {
2220
+ const existing = toolCallAccum.get(tc.index);
2221
+ if (existing) {
2222
+ if (tc.function?.arguments) existing.arguments += tc.function.arguments;
2223
+ } else {
2224
+ toolCallAccum.set(tc.index, {
2225
+ id: tc.id ?? "",
2226
+ name: tc.function?.name ?? "",
2227
+ arguments: tc.function?.arguments ?? ""
2228
+ });
2229
+ if (tc.id && tc.function?.name) {
2230
+ emit.single({ type: "tool_use_start", id: tc.id, name: tc.function.name });
2231
+ }
2232
+ }
2233
+ if (tc.function?.arguments) {
2234
+ emit.single({ type: "tool_use_delta", input: tc.function.arguments });
2235
+ }
2236
+ }
2237
+ }
2238
+ if (chunk.usage) {
2239
+ finalUsage = chunk.usage;
2240
+ }
2021
2241
  if (chunk.choices[0]?.finish_reason) {
2022
2242
  emit.single({
2023
2243
  type: "content_complete",
2024
2244
  content: fullContent
2025
2245
  });
2026
- const inputTokens = chunk.usage?.prompt_tokens ?? 0;
2027
- const outputTokens = chunk.usage?.completion_tokens ?? 0;
2028
- emit.single({
2029
- type: "usage",
2030
- usage: {
2031
- inputTokens,
2032
- outputTokens,
2033
- totalTokens: inputTokens + outputTokens,
2034
- estimatedCost: calculateCost(
2035
- inputTokens,
2036
- outputTokens,
2037
- model
2038
- )
2039
- }
2040
- });
2041
- emit.end();
2042
2246
  }
2043
2247
  }
2248
+ const inputTokens = finalUsage?.prompt_tokens ?? 0;
2249
+ const outputTokens = finalUsage?.completion_tokens ?? 0;
2250
+ const cacheUsage = {
2251
+ cached_tokens: finalUsage?.prompt_tokens_details?.cached_tokens
2252
+ };
2253
+ emit.single({
2254
+ type: "usage",
2255
+ usage: {
2256
+ inputTokens,
2257
+ outputTokens,
2258
+ totalTokens: inputTokens + outputTokens,
2259
+ estimatedCost: calculateCost(
2260
+ inputTokens,
2261
+ outputTokens,
2262
+ model,
2263
+ cacheUsage,
2264
+ config.pricingRegistry
2265
+ )
2266
+ }
2267
+ });
2268
+ emit.end();
2044
2269
  } catch (error) {
2045
2270
  const err = error;
2046
2271
  emit.fail(
@@ -2105,7 +2330,8 @@ ${schemaStr}`
2105
2330
  });
2106
2331
  const response = mapOpenAIResponse(
2107
2332
  completeResult,
2108
- model
2333
+ model,
2334
+ config.pricingRegistry
2109
2335
  );
2110
2336
  try {
2111
2337
  const parsed = JSON.parse(response.content);
@@ -2167,11 +2393,17 @@ ${schemaStr}`
2167
2393
  jsonSchemaEnforcement: true,
2168
2394
  prefillSupport: false,
2169
2395
  grammarConstraints: false
2396
+ }),
2397
+ capabilities: () => Effect5.succeed({
2398
+ supportsToolCalling: true,
2399
+ supportsStreaming: true,
2400
+ supportsStructuredOutput: true,
2401
+ supportsLogprobs: true
2170
2402
  })
2171
2403
  });
2172
2404
  })
2173
2405
  );
2174
- var mapOpenAIResponse = (response, model) => {
2406
+ var mapOpenAIResponse = (response, model, registry) => {
2175
2407
  const message = response.choices[0]?.message;
2176
2408
  const content = message?.content ?? "";
2177
2409
  const rawToolCalls = message?.tool_calls;
@@ -2190,6 +2422,17 @@ var mapOpenAIResponse = (response, model) => {
2190
2422
  input
2191
2423
  };
2192
2424
  }) : void 0;
2425
+ const rawLogprobs = response.choices[0]?.logprobs?.content;
2426
+ const logprobs = rawLogprobs ? rawLogprobs.map((lp) => ({
2427
+ token: lp.token,
2428
+ logprob: lp.logprob,
2429
+ ...lp.top_logprobs ? {
2430
+ topLogprobs: lp.top_logprobs.map((tlp) => ({
2431
+ token: tlp.token,
2432
+ logprob: tlp.logprob
2433
+ }))
2434
+ } : {}
2435
+ })) : void 0;
2193
2436
  return {
2194
2437
  content,
2195
2438
  stopReason,
@@ -2200,11 +2443,16 @@ var mapOpenAIResponse = (response, model) => {
2200
2443
  estimatedCost: calculateCost(
2201
2444
  response.usage?.prompt_tokens ?? 0,
2202
2445
  response.usage?.completion_tokens ?? 0,
2203
- model
2446
+ model,
2447
+ {
2448
+ cached_tokens: response.usage?.prompt_tokens_details?.cached_tokens
2449
+ },
2450
+ registry
2204
2451
  )
2205
2452
  },
2206
2453
  model: response.model ?? model,
2207
- toolCalls
2454
+ toolCalls,
2455
+ ...logprobs ? { logprobs } : {}
2208
2456
  };
2209
2457
  };
2210
2458
 
@@ -2216,7 +2464,7 @@ var PROVIDER_DEFAULT_MODELS = {
2216
2464
  anthropic: "claude-sonnet-4-20250514",
2217
2465
  openai: "gpt-4o",
2218
2466
  ollama: "cogito:14b",
2219
- gemini: "gemini-2.0-flash",
2467
+ gemini: "gemini-2.5-flash",
2220
2468
  litellm: "gpt-4o",
2221
2469
  test: "test-model"
2222
2470
  };
@@ -2349,7 +2597,9 @@ var LocalProviderLive = Layer5.effect(
2349
2597
  options: {
2350
2598
  temperature: request.temperature ?? config.defaultTemperature,
2351
2599
  num_predict: request.maxTokens ?? config.defaultMaxTokens,
2352
- stop: request.stopSequences ? [...request.stopSequences] : void 0
2600
+ stop: request.stopSequences ? [...request.stopSequences] : void 0,
2601
+ ...request.logprobs ? { logprobs: true } : {},
2602
+ ...request.topLogprobs != null ? { top_logprobs: request.topLogprobs } : {}
2353
2603
  }
2354
2604
  });
2355
2605
  },
@@ -2363,6 +2613,17 @@ var LocalProviderLive = Layer5.effect(
2363
2613
  response.message?.tool_calls
2364
2614
  );
2365
2615
  const hasToolCalls = toolCalls && toolCalls.length > 0;
2616
+ const rawLogprobs = response.logprobs;
2617
+ const logprobs = rawLogprobs ? rawLogprobs.map((lp) => ({
2618
+ token: lp.token,
2619
+ logprob: lp.logprob,
2620
+ ...lp.top_logprobs ? {
2621
+ topLogprobs: lp.top_logprobs.map((tlp) => ({
2622
+ token: tlp.token,
2623
+ logprob: tlp.logprob
2624
+ }))
2625
+ } : {}
2626
+ })) : void 0;
2366
2627
  return {
2367
2628
  content,
2368
2629
  stopReason: hasToolCalls ? "tool_use" : response.done_reason === "stop" ? "end_turn" : response.done_reason === "length" ? "max_tokens" : "end_turn",
@@ -2375,7 +2636,8 @@ var LocalProviderLive = Layer5.effect(
2375
2636
  },
2376
2637
  model: response.model ?? model,
2377
2638
  toolCalls,
2378
- ...thinkingContent ? { thinking: thinkingContent } : {}
2639
+ ...thinkingContent ? { thinking: thinkingContent } : {},
2640
+ ...logprobs ? { logprobs } : {}
2379
2641
  };
2380
2642
  }).pipe(
2381
2643
  Effect6.retry(retryPolicy),
@@ -2409,6 +2671,7 @@ var LocalProviderLive = Layer5.effect(
2409
2671
  model,
2410
2672
  config.thinking
2411
2673
  );
2674
+ const wantLogprobs = request.logprobs ?? false;
2412
2675
  const stream = await client.chat({
2413
2676
  model,
2414
2677
  messages: msgs,
@@ -2418,10 +2681,13 @@ var LocalProviderLive = Layer5.effect(
2418
2681
  keep_alive: "5m",
2419
2682
  options: {
2420
2683
  temperature: request.temperature ?? config.defaultTemperature,
2421
- num_predict: request.maxTokens ?? config.defaultMaxTokens
2684
+ num_predict: request.maxTokens ?? config.defaultMaxTokens,
2685
+ ...wantLogprobs ? { logprobs: true } : {}
2422
2686
  }
2423
2687
  });
2424
2688
  let fullContent = "";
2689
+ const accumulatedLogprobs = [];
2690
+ const accumulatedToolCalls = [];
2425
2691
  for await (const chunk of stream) {
2426
2692
  if (chunk.message?.content) {
2427
2693
  fullContent += chunk.message.content;
@@ -2430,11 +2696,53 @@ var LocalProviderLive = Layer5.effect(
2430
2696
  text: chunk.message.content
2431
2697
  });
2432
2698
  }
2699
+ if (chunk.message?.tool_calls && Array.isArray(chunk.message.tool_calls)) {
2700
+ for (const tc of chunk.message.tool_calls) {
2701
+ const toolCall = {
2702
+ id: `ollama-tc-${Date.now()}-${accumulatedToolCalls.length}`,
2703
+ name: tc.function.name,
2704
+ input: tc.function.arguments
2705
+ };
2706
+ accumulatedToolCalls.push(toolCall);
2707
+ emit.single({
2708
+ type: "tool_use_start",
2709
+ id: toolCall.id,
2710
+ name: toolCall.name
2711
+ });
2712
+ emit.single({
2713
+ type: "tool_use_delta",
2714
+ input: JSON.stringify(tc.function.arguments)
2715
+ });
2716
+ }
2717
+ }
2718
+ if (wantLogprobs) {
2719
+ const chunkLp = chunk.logprobs;
2720
+ if (Array.isArray(chunkLp)) {
2721
+ for (const lp of chunkLp) {
2722
+ accumulatedLogprobs.push({
2723
+ token: lp.token,
2724
+ logprob: lp.logprob,
2725
+ ...lp.top_logprobs ? { topLogprobs: lp.top_logprobs.map((t) => ({ token: t.token, logprob: t.logprob })) } : {}
2726
+ });
2727
+ }
2728
+ }
2729
+ }
2433
2730
  if (chunk.done) {
2731
+ const hasToolCalls = accumulatedToolCalls.length > 0;
2732
+ const doneReason = chunk.done_reason;
2434
2733
  emit.single({
2435
2734
  type: "content_complete",
2436
- content: fullContent
2735
+ content: fullContent,
2736
+ ...hasToolCalls ? { stopReason: "tool_use" } : {
2737
+ stopReason: doneReason === "stop" ? "end_turn" : doneReason === "length" ? "max_tokens" : "end_turn"
2738
+ }
2437
2739
  });
2740
+ if (accumulatedLogprobs.length > 0) {
2741
+ emit.single({
2742
+ type: "logprobs",
2743
+ logprobs: accumulatedLogprobs
2744
+ });
2745
+ }
2438
2746
  emit.single({
2439
2747
  type: "usage",
2440
2748
  usage: {
@@ -2563,6 +2871,12 @@ No markdown, no code fences, just raw JSON.`
2563
2871
  jsonSchemaEnforcement: true,
2564
2872
  prefillSupport: false,
2565
2873
  grammarConstraints: true
2874
+ }),
2875
+ capabilities: () => Effect6.succeed({
2876
+ supportsToolCalling: true,
2877
+ supportsStreaming: true,
2878
+ supportsStructuredOutput: true,
2879
+ supportsLogprobs: false
2566
2880
  })
2567
2881
  });
2568
2882
  })
@@ -2579,7 +2893,7 @@ var toGeminiContents = (messages) => {
2579
2893
  role: "user",
2580
2894
  parts: [{
2581
2895
  functionResponse: {
2582
- name: "tool",
2896
+ name: msg.toolName ?? "unknown_tool",
2583
2897
  response: { content: msg.content }
2584
2898
  }
2585
2899
  }]
@@ -2601,7 +2915,7 @@ var toGeminiContents = (messages) => {
2601
2915
  } else if (block.type === "tool_result") {
2602
2916
  parts.push({
2603
2917
  functionResponse: {
2604
- name: "tool",
2918
+ name: block.name ?? "unknown_tool",
2605
2919
  response: { content: block.content }
2606
2920
  }
2607
2921
  });
@@ -2643,7 +2957,7 @@ var toEffectError3 = (error) => {
2643
2957
  cause: error
2644
2958
  });
2645
2959
  };
2646
- var mapGeminiResponse = (response, model) => {
2960
+ var mapGeminiResponse = (response, model, registry) => {
2647
2961
  const toolCalls = response.functionCalls?.map((fc, i) => ({
2648
2962
  id: `call_${i}`,
2649
2963
  name: fc.name,
@@ -2658,7 +2972,15 @@ var mapGeminiResponse = (response, model) => {
2658
2972
  inputTokens,
2659
2973
  outputTokens,
2660
2974
  totalTokens: inputTokens + outputTokens,
2661
- estimatedCost: calculateCost(inputTokens, outputTokens, model)
2975
+ estimatedCost: calculateCost(
2976
+ inputTokens,
2977
+ outputTokens,
2978
+ model,
2979
+ {
2980
+ cached_content_token_count: response.usageMetadata?.cachedContentTokenCount
2981
+ },
2982
+ registry
2983
+ )
2662
2984
  },
2663
2985
  model,
2664
2986
  toolCalls: toolCalls?.length ? toolCalls : void 0
@@ -2714,7 +3036,7 @@ var GeminiProviderLive = Layer6.effect(
2714
3036
  }),
2715
3037
  catch: toEffectError3
2716
3038
  });
2717
- return mapGeminiResponse(response, model);
3039
+ return mapGeminiResponse(response, model, config.pricingRegistry);
2718
3040
  }).pipe(
2719
3041
  Effect7.retry(retryPolicy),
2720
3042
  Effect7.timeout("30 seconds"),
@@ -2746,30 +3068,56 @@ var GeminiProviderLive = Layer6.effect(
2746
3068
  config: buildGeminiConfig({
2747
3069
  maxTokens: request.maxTokens,
2748
3070
  temperature: request.temperature,
2749
- systemPrompt
3071
+ systemPrompt,
3072
+ tools: request.tools
2750
3073
  })
2751
3074
  });
2752
3075
  let fullContent = "";
2753
3076
  let inputTokens = 0;
2754
3077
  let outputTokens = 0;
3078
+ let cachedContentTokens = 0;
3079
+ const accumulatedToolCalls = [];
2755
3080
  for await (const chunk of stream) {
2756
3081
  if (chunk.text) {
2757
3082
  emit.single({ type: "text_delta", text: chunk.text });
2758
3083
  fullContent += chunk.text;
2759
3084
  }
3085
+ const fcs = chunk.functionCalls;
3086
+ if (fcs && fcs.length > 0) {
3087
+ for (const fc of fcs) {
3088
+ const tcId = `gemini-tc-${Date.now()}-${accumulatedToolCalls.length}`;
3089
+ accumulatedToolCalls.push({ id: tcId, name: fc.name, input: fc.args });
3090
+ emit.single({ type: "tool_use_start", id: tcId, name: fc.name });
3091
+ emit.single({ type: "tool_use_delta", input: JSON.stringify(fc.args) });
3092
+ }
3093
+ }
2760
3094
  if (chunk.usageMetadata) {
2761
3095
  inputTokens = chunk.usageMetadata.promptTokenCount ?? 0;
2762
3096
  outputTokens = chunk.usageMetadata.candidatesTokenCount ?? 0;
3097
+ cachedContentTokens = chunk.usageMetadata.cachedContentTokenCount ?? 0;
2763
3098
  }
2764
3099
  }
2765
- emit.single({ type: "content_complete", content: fullContent });
3100
+ const hasToolCalls = accumulatedToolCalls.length > 0;
3101
+ emit.single({
3102
+ type: "content_complete",
3103
+ content: fullContent,
3104
+ ...hasToolCalls ? { stopReason: "tool_use", toolCalls: accumulatedToolCalls } : {}
3105
+ });
2766
3106
  emit.single({
2767
3107
  type: "usage",
2768
3108
  usage: {
2769
3109
  inputTokens,
2770
3110
  outputTokens,
2771
3111
  totalTokens: inputTokens + outputTokens,
2772
- estimatedCost: calculateCost(inputTokens, outputTokens, model)
3112
+ estimatedCost: calculateCost(
3113
+ inputTokens,
3114
+ outputTokens,
3115
+ model,
3116
+ {
3117
+ cached_content_token_count: cachedContentTokens || void 0
3118
+ },
3119
+ config.pricingRegistry
3120
+ )
2773
3121
  }
2774
3122
  });
2775
3123
  emit.end();
@@ -2831,7 +3179,7 @@ ${schemaStr}`
2831
3179
  }),
2832
3180
  catch: toEffectError3
2833
3181
  });
2834
- const mapped = mapGeminiResponse(response, model);
3182
+ const mapped = mapGeminiResponse(response, model, config.pricingRegistry);
2835
3183
  try {
2836
3184
  const parsed = JSON.parse(mapped.content);
2837
3185
  const decoded = Schema5.decodeUnknownEither(
@@ -2884,6 +3232,12 @@ ${schemaStr}`
2884
3232
  jsonSchemaEnforcement: false,
2885
3233
  prefillSupport: false,
2886
3234
  grammarConstraints: false
3235
+ }),
3236
+ capabilities: () => Effect7.succeed({
3237
+ supportsToolCalling: true,
3238
+ supportsStreaming: true,
3239
+ supportsStructuredOutput: true,
3240
+ supportsLogprobs: false
2887
3241
  })
2888
3242
  });
2889
3243
  })
@@ -2929,7 +3283,7 @@ var toLiteLLMTool = (tool) => ({
2929
3283
  parameters: tool.inputSchema
2930
3284
  }
2931
3285
  });
2932
- var mapLiteLLMResponse = (response, model) => {
3286
+ var mapLiteLLMResponse = (response, model, registry) => {
2933
3287
  const message = response.choices[0]?.message;
2934
3288
  const content = message?.content ?? "";
2935
3289
  const rawToolCalls = message?.tool_calls;
@@ -2954,7 +3308,14 @@ var mapLiteLLMResponse = (response, model) => {
2954
3308
  estimatedCost: calculateCost(
2955
3309
  response.usage?.prompt_tokens ?? 0,
2956
3310
  response.usage?.completion_tokens ?? 0,
2957
- model
3311
+ model,
3312
+ void 0,
3313
+ registry,
3314
+ // Prioritize costs returned directly from the proxy if available
3315
+ response.usage?.input_cost !== void 0 && response.usage?.output_cost !== void 0 ? {
3316
+ input: response.usage.input_cost / (response.usage.prompt_tokens || 1) * 1e6,
3317
+ output: response.usage.output_cost / (response.usage.completion_tokens || 1) * 1e6
3318
+ } : void 0
2958
3319
  )
2959
3320
  },
2960
3321
  model: response.model ?? model,
@@ -3008,7 +3369,11 @@ var LiteLLMProviderLive = Layer7.effect(
3008
3369
  try: () => liteLLMFetch(baseURL, "/chat/completions", requestBody, apiKey),
3009
3370
  catch: (error) => toEffectError4(error)
3010
3371
  });
3011
- return mapLiteLLMResponse(response, model);
3372
+ return mapLiteLLMResponse(
3373
+ response,
3374
+ model,
3375
+ config.pricingRegistry
3376
+ );
3012
3377
  }).pipe(
3013
3378
  Effect8.retry(retryPolicy),
3014
3379
  Effect8.timeout("30 seconds"),
@@ -3094,7 +3459,9 @@ var LiteLLMProviderLive = Layer7.effect(
3094
3459
  estimatedCost: calculateCost(
3095
3460
  inputTokens,
3096
3461
  outputTokens,
3097
- model
3462
+ model,
3463
+ void 0,
3464
+ config.pricingRegistry
3098
3465
  )
3099
3466
  }
3100
3467
  });
@@ -3165,7 +3532,8 @@ No markdown, no code fences, just raw JSON.`
3165
3532
  });
3166
3533
  const response = mapLiteLLMResponse(
3167
3534
  completeResult,
3168
- model
3535
+ model,
3536
+ config.pricingRegistry
3169
3537
  );
3170
3538
  try {
3171
3539
  const parsed = JSON.parse(response.content);
@@ -3229,6 +3597,12 @@ No markdown, no code fences, just raw JSON.`
3229
3597
  jsonSchemaEnforcement: false,
3230
3598
  prefillSupport: false,
3231
3599
  grammarConstraints: false
3600
+ }),
3601
+ capabilities: () => Effect8.succeed({
3602
+ supportsToolCalling: true,
3603
+ supportsStreaming: true,
3604
+ supportsStructuredOutput: true,
3605
+ supportsLogprobs: false
3232
3606
  })
3233
3607
  });
3234
3608
  })
@@ -3236,109 +3610,217 @@ No markdown, no code fences, just raw JSON.`
3236
3610
 
3237
3611
  // src/testing.ts
3238
3612
  import { Effect as Effect9, Layer as Layer8, Stream as Stream6, Schema as Schema7 } from "effect";
3239
- var TestLLMService = (responses) => ({
3240
- complete: (request) => Effect9.gen(function* () {
3241
- const lastMessage = request.messages[request.messages.length - 1];
3242
- const content = lastMessage && typeof lastMessage.content === "string" ? lastMessage.content : "";
3243
- const systemPrompt = typeof request.systemPrompt === "string" ? request.systemPrompt : "";
3244
- const searchText = `${content} ${systemPrompt}`;
3245
- for (const [pattern, response] of Object.entries(responses)) {
3246
- if (pattern.length > 0 && searchText.includes(pattern)) {
3613
+ function fakeUsage(inputLen, outputLen) {
3614
+ return {
3615
+ inputTokens: Math.ceil(inputLen / 4),
3616
+ outputTokens: Math.ceil(outputLen / 4),
3617
+ totalTokens: Math.ceil(inputLen / 4) + Math.ceil(outputLen / 4),
3618
+ estimatedCost: 0
3619
+ };
3620
+ }
3621
+ function extractSearchText(messages, request) {
3622
+ const lastMessage = messages[messages.length - 1];
3623
+ const content = lastMessage && typeof lastMessage.content === "string" ? lastMessage.content : "";
3624
+ const systemPrompt = typeof request.systemPrompt === "string" ? request.systemPrompt : "";
3625
+ return `${content} ${systemPrompt}`.trim();
3626
+ }
3627
+ function resolveTurn(scenario, callIndex, searchText) {
3628
+ for (let i = callIndex.value; i < scenario.length; i++) {
3629
+ const turn = scenario[i];
3630
+ const guard = turn.match;
3631
+ if (!guard || new RegExp(guard, "i").test(searchText)) {
3632
+ callIndex.value = Math.min(i + 1, scenario.length - 1);
3633
+ return { turn, matchedIndex: i };
3634
+ }
3635
+ }
3636
+ return { turn: scenario[scenario.length - 1], matchedIndex: scenario.length - 1 };
3637
+ }
3638
+ function buildToolCalls(specs, matchedIndex) {
3639
+ return specs.map((spec, i) => ({
3640
+ id: spec.id ?? `call-${matchedIndex}-${i}`,
3641
+ name: spec.name,
3642
+ input: spec.args
3643
+ }));
3644
+ }
3645
+ var TestLLMService = (scenario) => {
3646
+ const callIndex = { value: 0 };
3647
+ return {
3648
+ complete: (request) => Effect9.gen(function* () {
3649
+ const searchText = extractSearchText(request.messages, request);
3650
+ const { turn, matchedIndex } = resolveTurn(scenario, callIndex, searchText);
3651
+ if ("error" in turn) {
3652
+ throw new Error(turn.error);
3653
+ }
3654
+ if ("toolCall" in turn) {
3247
3655
  return {
3248
- content: response,
3249
- stopReason: "end_turn",
3250
- usage: {
3251
- inputTokens: Math.ceil(content.length / 4),
3252
- outputTokens: Math.ceil(response.length / 4),
3253
- totalTokens: Math.ceil(content.length / 4) + Math.ceil(response.length / 4),
3254
- estimatedCost: 0
3255
- },
3256
- model: "test-model"
3656
+ content: "",
3657
+ stopReason: "tool_use",
3658
+ usage: fakeUsage(searchText.length, 0),
3659
+ model: "test-model",
3660
+ toolCalls: buildToolCalls([turn.toolCall], matchedIndex)
3257
3661
  };
3258
3662
  }
3259
- }
3260
- return {
3261
- content: "Test response",
3262
- stopReason: "end_turn",
3263
- usage: {
3264
- inputTokens: 0,
3265
- outputTokens: 0,
3266
- totalTokens: 0,
3267
- estimatedCost: 0
3268
- },
3269
- model: "test-model"
3270
- };
3271
- }),
3272
- stream: (request) => {
3273
- const lastMessage = request.messages[request.messages.length - 1];
3274
- const content = lastMessage && typeof lastMessage.content === "string" ? lastMessage.content : "";
3275
- const systemPrompt = typeof request.systemPrompt === "string" ? request.systemPrompt : "";
3276
- const searchText = `${content} ${systemPrompt}`;
3277
- let matchedResponse = "Test response";
3278
- for (const [pattern, response] of Object.entries(responses)) {
3279
- if (pattern.length > 0 && searchText.includes(pattern)) {
3280
- matchedResponse = response;
3281
- break;
3663
+ if ("toolCalls" in turn) {
3664
+ return {
3665
+ content: "",
3666
+ stopReason: "tool_use",
3667
+ usage: fakeUsage(searchText.length, 0),
3668
+ model: "test-model",
3669
+ toolCalls: buildToolCalls(turn.toolCalls, matchedIndex)
3670
+ };
3282
3671
  }
3283
- }
3284
- const inputTokens = Math.ceil(content.length / 4);
3285
- const outputTokens = Math.ceil(matchedResponse.length / 4);
3286
- return Effect9.succeed(
3287
- Stream6.make(
3288
- {
3289
- type: "text_delta",
3290
- text: matchedResponse
3291
- },
3292
- {
3293
- type: "content_complete",
3294
- content: matchedResponse
3295
- },
3296
- {
3297
- type: "usage",
3298
- usage: {
3299
- inputTokens,
3300
- outputTokens,
3301
- totalTokens: inputTokens + outputTokens,
3302
- estimatedCost: 0
3672
+ const content = "json" in turn ? JSON.stringify(turn.json) : "text" in turn ? turn.text : "";
3673
+ return {
3674
+ content,
3675
+ stopReason: "end_turn",
3676
+ usage: fakeUsage(searchText.length, content.length),
3677
+ model: "test-model"
3678
+ };
3679
+ }),
3680
+ stream: (request) => {
3681
+ const searchText = extractSearchText(request.messages, request);
3682
+ const { turn, matchedIndex } = resolveTurn(scenario, callIndex, searchText);
3683
+ if ("error" in turn) {
3684
+ return Effect9.succeed(
3685
+ Stream6.make(
3686
+ { type: "error", error: turn.error }
3687
+ )
3688
+ );
3689
+ }
3690
+ const specs = "toolCall" in turn ? [turn.toolCall] : "toolCalls" in turn ? turn.toolCalls : null;
3691
+ if (specs) {
3692
+ const events = [
3693
+ ...specs.flatMap((spec, i) => [
3694
+ {
3695
+ type: "tool_use_start",
3696
+ id: spec.id ?? `call-${matchedIndex}-${i}`,
3697
+ name: spec.name
3698
+ },
3699
+ {
3700
+ type: "tool_use_delta",
3701
+ input: JSON.stringify(spec.args)
3702
+ }
3703
+ ]),
3704
+ { type: "content_complete", content: "" },
3705
+ { type: "usage", usage: fakeUsage(searchText.length, 0) }
3706
+ ];
3707
+ return Effect9.succeed(
3708
+ Stream6.fromIterable(events)
3709
+ );
3710
+ }
3711
+ const content = "json" in turn ? JSON.stringify(turn.json) : "text" in turn ? turn.text : "";
3712
+ const inputTokens = Math.ceil(searchText.length / 4);
3713
+ const outputTokens = Math.ceil(content.length / 4);
3714
+ return Effect9.succeed(
3715
+ Stream6.make(
3716
+ { type: "text_delta", text: content },
3717
+ { type: "content_complete", content },
3718
+ {
3719
+ type: "usage",
3720
+ usage: {
3721
+ inputTokens,
3722
+ outputTokens,
3723
+ totalTokens: inputTokens + outputTokens,
3724
+ estimatedCost: 0
3725
+ }
3303
3726
  }
3304
- }
3727
+ )
3728
+ );
3729
+ },
3730
+ completeStructured: (request) => Effect9.gen(function* () {
3731
+ const searchText = extractSearchText(request.messages, request);
3732
+ const { turn } = resolveTurn(scenario, callIndex, searchText);
3733
+ if ("error" in turn) {
3734
+ throw new Error(turn.error);
3735
+ }
3736
+ if ("json" in turn) {
3737
+ return turn.json;
3738
+ }
3739
+ const responseContent = "text" in turn ? turn.text : "{}";
3740
+ const parsed = JSON.parse(responseContent);
3741
+ return Schema7.decodeUnknownSync(request.outputSchema)(parsed);
3742
+ }),
3743
+ embed: (texts) => Effect9.succeed(
3744
+ texts.map(() => new Array(768).fill(0).map(() => Math.random()))
3745
+ ),
3746
+ countTokens: (messages) => Effect9.succeed(
3747
+ messages.reduce(
3748
+ (sum, m) => sum + (typeof m.content === "string" ? Math.ceil(m.content.length / 4) : 100),
3749
+ 0
3305
3750
  )
3306
- );
3307
- },
3308
- completeStructured: (request) => Effect9.gen(function* () {
3309
- const lastMessage = request.messages[request.messages.length - 1];
3310
- const content = lastMessage && typeof lastMessage.content === "string" ? lastMessage.content : "";
3311
- let responseContent = "Test response";
3312
- for (const [pattern, response] of Object.entries(responses)) {
3313
- if (content.includes(pattern)) {
3314
- responseContent = response;
3315
- break;
3751
+ ),
3752
+ getModelConfig: () => Effect9.succeed({
3753
+ provider: "anthropic",
3754
+ model: "test-model"
3755
+ }),
3756
+ getStructuredOutputCapabilities: () => Effect9.succeed({
3757
+ nativeJsonMode: true,
3758
+ jsonSchemaEnforcement: false,
3759
+ prefillSupport: false,
3760
+ grammarConstraints: false
3761
+ }),
3762
+ capabilities: () => Effect9.succeed({
3763
+ ...DEFAULT_CAPABILITIES,
3764
+ supportsToolCalling: true,
3765
+ // Test provider emits native FC stream events (tool_use_start/tool_use_delta)
3766
+ supportsStreaming: true
3767
+ })
3768
+ };
3769
+ };
3770
+ var TestLLMServiceLayer = (scenario = [{ text: "" }]) => Layer8.succeed(LLMService, LLMService.of(TestLLMService(scenario)));
3771
+
3772
+ // src/pricing.ts
3773
+ import { Effect as Effect10 } from "effect";
3774
+ var openRouterPricingProvider = {
3775
+ fetchPricing: () => Effect10.gen(function* () {
3776
+ const res = yield* Effect10.tryPromise({
3777
+ try: () => fetch("https://openrouter.ai/api/v1/models"),
3778
+ catch: (e) => new Error(`Fetch failed: ${e}`)
3779
+ });
3780
+ if (!res.ok) {
3781
+ return yield* Effect10.fail(new Error(`OpenRouter API returned ${res.status}`));
3782
+ }
3783
+ const json = yield* Effect10.tryPromise({
3784
+ try: () => res.json(),
3785
+ catch: (e) => new Error(`JSON parse failed: ${e}`)
3786
+ });
3787
+ const registry = {};
3788
+ for (const model of json.data) {
3789
+ registry[model.id] = {
3790
+ input: parseFloat(model.pricing.prompt) * 1e6,
3791
+ output: parseFloat(model.pricing.completion) * 1e6
3792
+ };
3793
+ const shortName = model.id.split("/").pop();
3794
+ if (shortName && !registry[shortName]) {
3795
+ registry[shortName] = registry[model.id];
3316
3796
  }
3317
3797
  }
3318
- const parsed = JSON.parse(responseContent);
3319
- return Schema7.decodeUnknownSync(request.outputSchema)(parsed);
3320
- }),
3321
- embed: (texts) => Effect9.succeed(
3322
- texts.map(() => new Array(768).fill(0).map(() => Math.random()))
3323
- ),
3324
- countTokens: (messages) => Effect9.succeed(
3325
- messages.reduce(
3326
- (sum, m) => sum + (typeof m.content === "string" ? Math.ceil(m.content.length / 4) : 100),
3327
- 0
3328
- )
3329
- ),
3330
- getModelConfig: () => Effect9.succeed({
3331
- provider: "anthropic",
3332
- model: "test-model"
3333
- }),
3334
- getStructuredOutputCapabilities: () => Effect9.succeed({
3335
- nativeJsonMode: true,
3336
- jsonSchemaEnforcement: false,
3337
- prefillSupport: false,
3338
- grammarConstraints: false
3798
+ return registry;
3799
+ })
3800
+ };
3801
+ var urlPricingProvider = (url) => ({
3802
+ fetchPricing: () => Effect10.gen(function* () {
3803
+ const res = yield* Effect10.tryPromise({
3804
+ try: () => fetch(url),
3805
+ catch: (e) => new Error(`Fetch failed: ${e}`)
3806
+ });
3807
+ if (!res.ok) {
3808
+ return yield* Effect10.fail(new Error(`Custom pricing URL returned ${res.status}`));
3809
+ }
3810
+ const json = yield* Effect10.tryPromise({
3811
+ try: () => res.json(),
3812
+ catch: (e) => new Error(`JSON parse failed: ${e}`)
3813
+ });
3814
+ const registry = {};
3815
+ for (const [key, value] of Object.entries(json)) {
3816
+ registry[key] = {
3817
+ input: Number(value.input),
3818
+ output: Number(value.output)
3819
+ };
3820
+ }
3821
+ return registry;
3339
3822
  })
3340
3823
  });
3341
- var TestLLMServiceLayer = (responses = {}) => Layer8.succeed(LLMService, LLMService.of(TestLLMService(responses)));
3342
3824
 
3343
3825
  // src/structured-output.ts
3344
3826
  import { Schema as Schema8 } from "effect";
@@ -3405,10 +3887,10 @@ var ComplexityAnalysisSchema = Schema8.Struct({
3405
3887
  });
3406
3888
 
3407
3889
  // src/runtime.ts
3408
- import { Effect as Effect12, Layer as Layer9 } from "effect";
3890
+ import { Effect as Effect13, Layer as Layer9 } from "effect";
3409
3891
 
3410
3892
  // src/embedding-cache.ts
3411
- import { Effect as Effect10 } from "effect";
3893
+ import { Effect as Effect11 } from "effect";
3412
3894
  var MAX_ENTRIES = 5e3;
3413
3895
  var makeEmbeddingCache = (underlying) => {
3414
3896
  const caches = /* @__PURE__ */ new Map();
@@ -3432,7 +3914,7 @@ var makeEmbeddingCache = (underlying) => {
3432
3914
  }
3433
3915
  };
3434
3916
  return {
3435
- embed: (texts, model) => Effect10.gen(function* () {
3917
+ embed: (texts, model) => Effect11.gen(function* () {
3436
3918
  const modelKey = model ?? "__default__";
3437
3919
  const cache = getModelCache(modelKey);
3438
3920
  const results = new Array(texts.length);
@@ -3472,7 +3954,7 @@ var makeEmbeddingCache = (underlying) => {
3472
3954
  };
3473
3955
 
3474
3956
  // src/circuit-breaker.ts
3475
- import { Effect as Effect11 } from "effect";
3957
+ import { Effect as Effect12 } from "effect";
3476
3958
  var makeCircuitBreaker = (config = {}) => {
3477
3959
  const { failureThreshold, cooldownMs } = {
3478
3960
  ...defaultCircuitBreakerConfig,
@@ -3493,12 +3975,12 @@ var makeCircuitBreaker = (config = {}) => {
3493
3975
  }
3494
3976
  };
3495
3977
  return {
3496
- protect: (effect) => Effect11.gen(function* () {
3978
+ protect: (effect) => Effect12.gen(function* () {
3497
3979
  if (currentState === "open") {
3498
3980
  if (Date.now() - openedAt >= cooldownMs) {
3499
3981
  currentState = "half_open";
3500
3982
  } else {
3501
- return yield* Effect11.fail(
3983
+ return yield* Effect12.fail(
3502
3984
  new LLMError({
3503
3985
  message: `Circuit breaker OPEN \u2014 ${consecutiveFailures} consecutive failures. Retry after ${Math.ceil((cooldownMs - (Date.now() - openedAt)) / 1e3)}s cooldown.`,
3504
3986
  provider: "custom",
@@ -3507,13 +3989,13 @@ var makeCircuitBreaker = (config = {}) => {
3507
3989
  );
3508
3990
  }
3509
3991
  }
3510
- const result = yield* Effect11.exit(effect);
3992
+ const result = yield* Effect12.exit(effect);
3511
3993
  if (result._tag === "Success") {
3512
3994
  onSuccess();
3513
3995
  return result.value;
3514
3996
  }
3515
3997
  onFailure();
3516
- return yield* Effect11.failCause(result.cause);
3998
+ return yield* Effect12.failCause(result.cause);
3517
3999
  }),
3518
4000
  state: () => currentState,
3519
4001
  reset: () => {
@@ -3527,7 +4009,7 @@ var makeCircuitBreaker = (config = {}) => {
3527
4009
  // src/runtime.ts
3528
4010
  var EmbeddingCacheLayer = Layer9.effect(
3529
4011
  LLMService,
3530
- Effect12.gen(function* () {
4012
+ Effect13.gen(function* () {
3531
4013
  const llm = yield* LLMService;
3532
4014
  const cache = makeEmbeddingCache(llm.embed);
3533
4015
  return LLMService.of({ ...llm, embed: cache.embed });
@@ -3535,7 +4017,7 @@ var EmbeddingCacheLayer = Layer9.effect(
3535
4017
  );
3536
4018
  var makeCircuitBreakerLayer = (config) => Layer9.effect(
3537
4019
  LLMService,
3538
- Effect12.gen(function* () {
4020
+ Effect13.gen(function* () {
3539
4021
  const llm = yield* LLMService;
3540
4022
  const breaker = makeCircuitBreaker(config);
3541
4023
  return LLMService.of({
@@ -3545,10 +4027,10 @@ var makeCircuitBreakerLayer = (config) => Layer9.effect(
3545
4027
  });
3546
4028
  })
3547
4029
  );
3548
- var createLLMProviderLayer = (provider = "anthropic", testResponses, model, modelParams, circuitBreaker) => {
4030
+ var createLLMProviderLayer = (provider = "anthropic", testScenario, model, modelParams, circuitBreaker, pricingRegistry) => {
3549
4031
  if (provider === "test") {
3550
4032
  return Layer9.mergeAll(
3551
- TestLLMServiceLayer(testResponses ?? {}),
4033
+ TestLLMServiceLayer(testScenario ?? [{ text: "" }]),
3552
4034
  PromptManagerLive
3553
4035
  );
3554
4036
  }
@@ -3557,6 +4039,7 @@ var createLLMProviderLayer = (provider = "anthropic", testResponses, model, mode
3557
4039
  if (modelParams?.thinking !== void 0) configOverrides.thinking = modelParams.thinking;
3558
4040
  if (modelParams?.temperature !== void 0) configOverrides.defaultTemperature = modelParams.temperature;
3559
4041
  if (modelParams?.maxTokens !== void 0) configOverrides.defaultMaxTokens = modelParams.maxTokens;
4042
+ if (pricingRegistry) configOverrides.pricingRegistry = pricingRegistry;
3560
4043
  const configLayer = Object.keys(configOverrides).length > 0 ? Layer9.succeed(LLMConfig, LLMConfig.of({ ...llmConfigFromEnv, ...configOverrides })) : LLMConfigFromEnv;
3561
4044
  const providerLayer = provider === "anthropic" ? AnthropicProviderLive : provider === "openai" ? OpenAIProviderLive : provider === "gemini" ? GeminiProviderLive : provider === "litellm" ? LiteLLMProviderLive : LocalProviderLive;
3562
4045
  const baseProviderLayer = providerLayer.pipe(Layer9.provide(configLayer));
@@ -3577,13 +4060,331 @@ var createLLMProviderLayerWithConfig = (config, provider = "anthropic") => {
3577
4060
  PromptManagerLive
3578
4061
  );
3579
4062
  };
4063
+
4064
+ // src/rate-limiter.ts
4065
+ import { Effect as Effect14 } from "effect";
4066
+ var DEFAULT_CONFIG = {
4067
+ requestsPerMinute: 60,
4068
+ tokensPerMinute: 1e5,
4069
+ maxConcurrent: 10
4070
+ };
4071
+ var makeRateLimiter = (config = {}) => {
4072
+ const resolved = { ...DEFAULT_CONFIG, ...config };
4073
+ const WINDOW_MS = 6e4;
4074
+ const window2 = [];
4075
+ let concurrent = 0;
4076
+ const prune = (now) => {
4077
+ const cutoff = now - WINDOW_MS;
4078
+ while (window2.length > 0 && window2[0].ts <= cutoff) {
4079
+ window2.shift();
4080
+ }
4081
+ };
4082
+ const currentTokens = () => {
4083
+ return window2.reduce((sum, entry) => sum + entry.tokens, 0);
4084
+ };
4085
+ return {
4086
+ acquire: (messages) => Effect14.gen(function* () {
4087
+ const estimatedTokens = messages ? yield* estimateTokenCount(messages) : 0;
4088
+ while (true) {
4089
+ const now = Date.now();
4090
+ prune(now);
4091
+ if (concurrent >= resolved.maxConcurrent) {
4092
+ yield* Effect14.sleep("100 millis");
4093
+ continue;
4094
+ }
4095
+ if (window2.length >= resolved.requestsPerMinute) {
4096
+ const oldestTs = window2[0].ts;
4097
+ const waitMs = oldestTs + WINDOW_MS - now;
4098
+ if (waitMs > 0) {
4099
+ yield* Effect14.sleep(`${waitMs} millis`);
4100
+ continue;
4101
+ }
4102
+ }
4103
+ if (estimatedTokens > 0 && currentTokens() + estimatedTokens > resolved.tokensPerMinute && window2.length > 0) {
4104
+ const oldestTs = window2[0].ts;
4105
+ const waitMs = oldestTs + WINDOW_MS - now;
4106
+ if (waitMs > 0) {
4107
+ yield* Effect14.sleep(`${waitMs} millis`);
4108
+ continue;
4109
+ }
4110
+ }
4111
+ window2.push({ ts: now, tokens: estimatedTokens });
4112
+ concurrent++;
4113
+ return;
4114
+ }
4115
+ }),
4116
+ release: () => {
4117
+ if (concurrent > 0) concurrent--;
4118
+ },
4119
+ concurrentCount: () => concurrent,
4120
+ windowRequestCount: () => {
4121
+ prune(Date.now());
4122
+ return window2.length;
4123
+ },
4124
+ windowTokenCount: () => {
4125
+ prune(Date.now());
4126
+ return currentTokens();
4127
+ }
4128
+ };
4129
+ };
4130
+
4131
+ // src/rate-limited-provider.ts
4132
+ import { Effect as Effect15, Layer as Layer10 } from "effect";
4133
+ var makeRateLimitedProvider = (config = {}) => Layer10.effect(
4134
+ LLMService,
4135
+ Effect15.gen(function* () {
4136
+ const svc = yield* LLMService;
4137
+ const limiter = makeRateLimiter(config);
4138
+ return {
4139
+ complete: (req) => Effect15.gen(function* () {
4140
+ yield* limiter.acquire(req.messages);
4141
+ try {
4142
+ return yield* svc.complete(req);
4143
+ } finally {
4144
+ limiter.release();
4145
+ }
4146
+ }),
4147
+ stream: (req) => Effect15.gen(function* () {
4148
+ yield* limiter.acquire(req.messages);
4149
+ try {
4150
+ return yield* svc.stream(req);
4151
+ } finally {
4152
+ limiter.release();
4153
+ }
4154
+ }),
4155
+ completeStructured: (req) => Effect15.gen(function* () {
4156
+ yield* limiter.acquire(req.messages);
4157
+ try {
4158
+ return yield* svc.completeStructured(req);
4159
+ } finally {
4160
+ limiter.release();
4161
+ }
4162
+ }),
4163
+ // Passthrough — embedding, token counting, config, and capabilities are not rate-limited
4164
+ embed: svc.embed,
4165
+ countTokens: svc.countTokens,
4166
+ getModelConfig: svc.getModelConfig,
4167
+ getStructuredOutputCapabilities: svc.getStructuredOutputCapabilities,
4168
+ capabilities: svc.capabilities
4169
+ };
4170
+ })
4171
+ );
4172
+
4173
+ // src/fallback-chain.ts
4174
+ var FallbackChain = class {
4175
+ constructor(config) {
4176
+ this.config = config;
4177
+ this.threshold = config.errorThreshold ?? 3;
4178
+ }
4179
+ /** Error count per provider. */
4180
+ errorCounts = /* @__PURE__ */ new Map();
4181
+ /** Current index in the providers list. */
4182
+ currentProviderIndex = 0;
4183
+ /** Current index in the models list. */
4184
+ currentModelIndex = 0;
4185
+ /** Threshold for switching to next provider. */
4186
+ threshold;
4187
+ /**
4188
+ * Record an error for the given provider.
4189
+ * Increments the error count and switches to the next provider if threshold is met.
4190
+ *
4191
+ * @param provider - Provider name that errored
4192
+ */
4193
+ recordError(provider) {
4194
+ const count = (this.errorCounts.get(provider) ?? 0) + 1;
4195
+ this.errorCounts.set(provider, count);
4196
+ if (count >= this.threshold && this.currentProviderIndex < this.config.providers.length - 1) {
4197
+ this.currentProviderIndex++;
4198
+ }
4199
+ }
4200
+ /**
4201
+ * Record a rate limit error (429) for the given provider.
4202
+ * Falls back to the next model in the chain.
4203
+ *
4204
+ * @param _provider - Provider name that was rate limited (parameter name _ to indicate unused)
4205
+ */
4206
+ recordRateLimit(_provider) {
4207
+ if (this.config.models && this.currentModelIndex < this.config.models.length - 1) {
4208
+ this.currentModelIndex++;
4209
+ }
4210
+ }
4211
+ /**
4212
+ * Record a successful call for the given provider.
4213
+ * Resets the error count for that provider.
4214
+ *
4215
+ * @param provider - Provider name that succeeded
4216
+ */
4217
+ recordSuccess(provider) {
4218
+ this.errorCounts.set(provider, 0);
4219
+ }
4220
+ /**
4221
+ * Get the currently active provider.
4222
+ *
4223
+ * @returns Name of the provider to use
4224
+ */
4225
+ currentProvider() {
4226
+ const provider = this.config.providers[this.currentProviderIndex];
4227
+ if (!provider) {
4228
+ throw new Error(`FallbackChain: Invalid provider index ${this.currentProviderIndex}`);
4229
+ }
4230
+ return provider;
4231
+ }
4232
+ /**
4233
+ * Get the currently active model.
4234
+ * Returns undefined if no models are configured.
4235
+ *
4236
+ * @returns Name of the model to use, or undefined if no models configured
4237
+ */
4238
+ currentModel() {
4239
+ return this.config.models?.[this.currentModelIndex];
4240
+ }
4241
+ /**
4242
+ * Check if there are more fallbacks available (provider or model).
4243
+ *
4244
+ * @returns true if there are unused fallback providers or models, false if all exhausted
4245
+ */
4246
+ hasFallback() {
4247
+ const hasProviderFallback = this.currentProviderIndex < this.config.providers.length - 1;
4248
+ const hasModelFallback = this.config.models !== void 0 && this.currentModelIndex < this.config.models.length - 1;
4249
+ return hasProviderFallback || hasModelFallback;
4250
+ }
4251
+ };
4252
+
4253
+ // src/validation.ts
4254
+ function validateAndRepairMessages(messages) {
4255
+ if (messages.length === 0) return messages;
4256
+ const repaired = [];
4257
+ const toolCallIds = /* @__PURE__ */ new Set();
4258
+ for (let i = 0; i < messages.length; i++) {
4259
+ const msg = messages[i];
4260
+ if (msg.role === "assistant") {
4261
+ const toolCalls = msg.tool_calls ?? msg.toolCalls ?? [];
4262
+ for (const tc of toolCalls) {
4263
+ if (tc.id) toolCallIds.add(tc.id);
4264
+ }
4265
+ const content = typeof msg.content === "string" ? msg.content : "";
4266
+ repaired.push({ ...msg, content: content || "" });
4267
+ continue;
4268
+ }
4269
+ if (msg.role === "tool") {
4270
+ const callId = msg.tool_call_id ?? msg.toolCallId;
4271
+ if (callId && !toolCallIds.has(callId)) {
4272
+ continue;
4273
+ }
4274
+ repaired.push(msg);
4275
+ continue;
4276
+ }
4277
+ if (msg.role === "user" || msg.role === "system") {
4278
+ const content = typeof msg.content === "string" ? msg.content : "";
4279
+ if (!content.trim()) {
4280
+ repaired.push({ ...msg, content: "..." });
4281
+ continue;
4282
+ }
4283
+ }
4284
+ repaired.push(msg);
4285
+ }
4286
+ return repaired;
4287
+ }
4288
+
4289
+ // src/adapter.ts
4290
+ var defaultAdapter = {
4291
+ continuationHint({ missingTools, toolsUsed, iteration, maxIterations }) {
4292
+ if (missingTools.length === 0) {
4293
+ return toolsUsed.size > 0 ? "You have completed all required tool calls. Now synthesize the results and provide your FINAL ANSWER." : void 0;
4294
+ }
4295
+ const toolList = missingTools.join(", ");
4296
+ const urgency = iteration >= maxIterations - 3 ? ` You have ${maxIterations - iteration} iterations left.` : "";
4297
+ return `You must still call: ${toolList}. Call the next required tool now.${urgency}`;
4298
+ }
4299
+ };
4300
+ var localModelAdapter = {
4301
+ systemPromptPatch(basePrompt, tier) {
4302
+ if (tier !== "local") return void 0;
4303
+ return basePrompt + "\n\nIMPORTANT: When given a multi-step task, complete ALL steps in sequence. After gathering information, immediately proceed to the next step. Never stop after only searching \u2014 always produce the deliverable.";
4304
+ },
4305
+ taskFraming({ task, requiredTools, tier }) {
4306
+ if (tier !== "local" || requiredTools.length === 0) return void 0;
4307
+ const steps = requiredTools.map((t, i) => `${i + 1}. Call ${t}`).join("\n");
4308
+ return `${task}
4309
+
4310
+ Complete these steps in order:
4311
+ ${steps}
4312
+ Do not stop until all steps are done.`;
4313
+ },
4314
+ toolGuidance({ requiredTools, tier }) {
4315
+ if (tier !== "local" || requiredTools.length === 0) return void 0;
4316
+ return `
4317
+ Required tools for this task: ${requiredTools.join(", ")}. You MUST call all of them before giving a final answer.`;
4318
+ },
4319
+ continuationHint({ toolsUsed, missingTools, iteration, maxIterations, lastToolName }) {
4320
+ if (missingTools.length === 0) return void 0;
4321
+ const urgency = iteration >= maxIterations - 2 ? " This is urgent \u2014 you are running low on iterations." : "";
4322
+ if (lastToolName && (lastToolName.includes("search") || lastToolName.includes("http"))) {
4323
+ const writeTools = missingTools.filter((t) => t.includes("write") || t.includes("file"));
4324
+ if (writeTools.length > 0) {
4325
+ return `You have gathered research data. Synthesize the findings and call ${writeTools[0]} to save the output.${urgency} Do NOT search again.`;
4326
+ }
4327
+ }
4328
+ if (missingTools.length === 1) {
4329
+ return `Your next step: call ${missingTools[0]}. You have all the information you need.${urgency}`;
4330
+ }
4331
+ return `Complete these steps in order: ${missingTools.join(" \u2192 ")}.${urgency} Proceed with the first one now.`;
4332
+ },
4333
+ errorRecovery({ toolName, errorContent, missingTools, tier }) {
4334
+ if (tier !== "local") return void 0;
4335
+ const isNotFound = errorContent.includes("404") || errorContent.includes("Not Found");
4336
+ const isTimeout = errorContent.toLowerCase().includes("timeout");
4337
+ if (isNotFound) {
4338
+ return `${toolName} returned 404 \u2014 that URL doesn't exist. Try a different URL or use web-search to find the correct one.${missingTools.length > 0 ? ` You still need to call: ${missingTools.join(", ")}.` : ""}`;
4339
+ }
4340
+ if (isTimeout) {
4341
+ return `${toolName} timed out. Try again with a simpler request, or skip this step and proceed with what you have.`;
4342
+ }
4343
+ return `${toolName} failed. Try an alternative approach or use a different tool to get the information you need.`;
4344
+ },
4345
+ synthesisPrompt({ missingOutputTools, observationCount, tier }) {
4346
+ if (tier !== "local" || missingOutputTools.length === 0) return void 0;
4347
+ return `You have gathered ${observationCount} piece${observationCount !== 1 ? "s" : ""} of information. That is enough. Do NOT search again. Now call ${missingOutputTools[0]} to produce the final output. Synthesize everything you have learned into a complete, well-structured response.`;
4348
+ },
4349
+ qualityCheck({ task, requiredTools, toolsUsed, tier }) {
4350
+ if (tier !== "local") return void 0;
4351
+ const unmet = requiredTools.filter((t) => !toolsUsed.has(t));
4352
+ if (unmet.length > 0) {
4353
+ return `Before finishing: you have not yet called ${unmet.join(", ")}. Call ${unmet[0]} now.`;
4354
+ }
4355
+ return `Review your answer: does it fully address the task "${task.slice(0, 120)}"? If yes, give it. If not, complete the missing parts first.`;
4356
+ }
4357
+ };
4358
+ var midModelAdapter = {
4359
+ continuationHint({ missingTools, toolsUsed, iteration, maxIterations }) {
4360
+ if (missingTools.length === 0) {
4361
+ return toolsUsed.size > 0 ? "All required tools called. Synthesize and give your final answer." : void 0;
4362
+ }
4363
+ const urgency = iteration >= maxIterations - 2 ? ` (${maxIterations - iteration} steps left)` : "";
4364
+ return `Still needed: ${missingTools.join(", ")}. Call the next one now.${urgency}`;
4365
+ },
4366
+ synthesisPrompt({ missingOutputTools, tier }) {
4367
+ if (tier !== "mid" || missingOutputTools.length === 0) return void 0;
4368
+ return `Research complete. Now call ${missingOutputTools[0]} to produce the output.`;
4369
+ }
4370
+ };
4371
+ function selectAdapter(_capabilities, tier) {
4372
+ if (tier === "local") return localModelAdapter;
4373
+ if (tier === "mid") return midModelAdapter;
4374
+ return defaultAdapter;
4375
+ }
4376
+ function recommendStrategyForTier(_tier, _configuredStrategy, _requiredTools) {
4377
+ return void 0;
4378
+ }
3580
4379
  export {
3581
4380
  AnthropicProviderLive,
3582
4381
  CacheControlSchema,
3583
4382
  CompletionResponseSchema,
3584
4383
  ComplexityAnalysisSchema,
4384
+ DEFAULT_CAPABILITIES,
3585
4385
  DefaultEmbeddingConfig,
3586
4386
  EmbeddingConfigSchema,
4387
+ FallbackChain,
3587
4388
  GeminiProviderLive,
3588
4389
  ImageContentBlockSchema,
3589
4390
  ImageSourceSchema,
@@ -3621,13 +4422,22 @@ export {
3621
4422
  calculateCost,
3622
4423
  createLLMProviderLayer,
3623
4424
  createLLMProviderLayerWithConfig,
4425
+ defaultAdapter,
3624
4426
  defaultCircuitBreakerConfig,
3625
4427
  estimateTokenCount,
3626
4428
  getProviderDefaultModel,
3627
4429
  llmConfigFromEnv,
4430
+ localModelAdapter,
3628
4431
  makeCacheable,
3629
4432
  makeCircuitBreaker,
3630
4433
  makeEmbeddingCache,
3631
- retryPolicy
4434
+ makeRateLimitedProvider,
4435
+ makeRateLimiter,
4436
+ openRouterPricingProvider,
4437
+ recommendStrategyForTier,
4438
+ retryPolicy,
4439
+ selectAdapter,
4440
+ urlPricingProvider,
4441
+ validateAndRepairMessages
3632
4442
  };
3633
4443
  //# sourceMappingURL=index.js.map